1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
3 
4 #define _GNU_SOURCE
5 #include <fcntl.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/stat.h>
10 #include <sys/mount.h>
11 #include <unistd.h>
12 #include <sys/syscall.h>
13 
14 #include "../../kselftest_harness.h"
15 #include "../../pidfd/pidfd.h"
16 #include "../statmount/statmount.h"
17 #include "../utils.h"
18 
19 // Needed for linux/fanotify.h
20 #ifndef __kernel_fsid_t
21 typedef struct {
22 	int	val[2];
23 } __kernel_fsid_t;
24 #endif
25 
26 #include <sys/fanotify.h>
27 
28 static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
29 
30 static const int mark_types[] = {
31 	FAN_MARK_FILESYSTEM,
32 	FAN_MARK_MOUNT,
33 	FAN_MARK_INODE
34 };
35 
36 static const int mark_cmds[] = {
37 	FAN_MARK_ADD,
38 	FAN_MARK_REMOVE,
39 	FAN_MARK_FLUSH
40 };
41 
42 #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
43 
FIXTURE(fanotify)44 FIXTURE(fanotify) {
45 	int fan_fd[NUM_FAN_FDS];
46 	char buf[256];
47 	unsigned int rem;
48 	void *next;
49 	char root_mntpoint[sizeof(root_mntpoint_templ)];
50 	int orig_root;
51 	int orig_ns_fd;
52 	int ns_fd;
53 	uint64_t root_id;
54 };
55 
FIXTURE_SETUP(fanotify)56 FIXTURE_SETUP(fanotify)
57 {
58 	int i, ret;
59 
60 	self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
61 	ASSERT_GE(self->orig_ns_fd, 0);
62 
63 	ret = setup_userns();
64 	ASSERT_EQ(ret, 0);
65 
66 	self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
67 	ASSERT_GE(self->ns_fd, 0);
68 
69 	strcpy(self->root_mntpoint, root_mntpoint_templ);
70 	ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
71 
72 	self->orig_root = open("/", O_PATH | O_CLOEXEC);
73 	ASSERT_GE(self->orig_root, 0);
74 
75 	ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
76 
77 	ASSERT_EQ(chroot(self->root_mntpoint), 0);
78 
79 	ASSERT_EQ(chdir("/"), 0);
80 
81 	ASSERT_EQ(mkdir("a", 0700), 0);
82 
83 	ASSERT_EQ(mkdir("b", 0700), 0);
84 
85 	self->root_id = get_unique_mnt_id("/");
86 	ASSERT_NE(self->root_id, 0);
87 
88 	for (i = 0; i < NUM_FAN_FDS; i++) {
89 		int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
90 		// Verify that watching tmpfs mounted inside userns is allowed
91 		ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
92 				    FAN_OPEN, AT_FDCWD, "/");
93 		ASSERT_EQ(ret, 0);
94 		// ...but watching entire orig root filesystem is not allowed
95 		ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
96 				    FAN_OPEN, self->orig_root, ".");
97 		ASSERT_NE(ret, 0);
98 		close(fan_fd);
99 
100 		self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
101 						0);
102 		ASSERT_GE(self->fan_fd[i], 0);
103 		// Verify that watching mntns where group was created is allowed
104 		ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
105 				    FAN_MARK_MNTNS,
106 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
107 				    self->ns_fd, NULL);
108 		ASSERT_EQ(ret, 0);
109 		// ...but watching orig mntns is not allowed
110 		ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
111 				    FAN_MARK_MNTNS,
112 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
113 				    self->orig_ns_fd, NULL);
114 		ASSERT_NE(ret, 0);
115 		// On fd[0] we do an extra ADD that changes nothing.
116 		// On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
117 		ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
118 				    FAN_MARK_MNTNS,
119 				    FAN_MNT_ATTACH | FAN_MNT_DETACH,
120 				    self->ns_fd, NULL);
121 		ASSERT_EQ(ret, 0);
122 	}
123 
124 	self->rem = 0;
125 }
126 
FIXTURE_TEARDOWN(fanotify)127 FIXTURE_TEARDOWN(fanotify)
128 {
129 	int i;
130 
131 	ASSERT_EQ(self->rem, 0);
132 	for (i = 0; i < NUM_FAN_FDS; i++)
133 		close(self->fan_fd[i]);
134 
135 	ASSERT_EQ(fchdir(self->orig_root), 0);
136 
137 	ASSERT_EQ(chroot("."), 0);
138 
139 	EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
140 	EXPECT_EQ(chdir(self->root_mntpoint), 0);
141 	EXPECT_EQ(chdir("/"), 0);
142 	EXPECT_EQ(rmdir(self->root_mntpoint), 0);
143 }
144 
expect_notify(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t * mask)145 static uint64_t expect_notify(struct __test_metadata *const _metadata,
146 			      FIXTURE_DATA(fanotify) *self,
147 			      uint64_t *mask)
148 {
149 	struct fanotify_event_metadata *meta;
150 	struct fanotify_event_info_mnt *mnt;
151 	unsigned int thislen;
152 
153 	if (!self->rem) {
154 		ssize_t len;
155 		int i;
156 
157 		for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
158 			len = read(self->fan_fd[i], self->buf,
159 				   sizeof(self->buf));
160 			if (i > 0) {
161 				// Groups 1,2 should get EAGAIN
162 				ASSERT_EQ(len, -1);
163 				ASSERT_EQ(errno, EAGAIN);
164 			} else {
165 				// Group 0 should get events
166 				ASSERT_GT(len, 0);
167 			}
168 		}
169 
170 		self->rem = len;
171 		self->next = (void *) self->buf;
172 	}
173 
174 	meta = self->next;
175 	ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
176 
177 	thislen = meta->event_len;
178 	self->rem -= thislen;
179 	self->next += thislen;
180 
181 	*mask = meta->mask;
182 	thislen -= sizeof(*meta);
183 
184 	mnt = ((void *) meta) + meta->event_len - thislen;
185 
186 	ASSERT_EQ(thislen, sizeof(*mnt));
187 
188 	return mnt->mnt_id;
189 }
190 
expect_notify_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,unsigned int n,uint64_t mask[],uint64_t mnts[])191 static void expect_notify_n(struct __test_metadata *const _metadata,
192 				 FIXTURE_DATA(fanotify) *self,
193 				 unsigned int n, uint64_t mask[], uint64_t mnts[])
194 {
195 	unsigned int i;
196 
197 	for (i = 0; i < n; i++)
198 		mnts[i] = expect_notify(_metadata, self, &mask[i]);
199 }
200 
expect_notify_mask(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t expect_mask)201 static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
202 				   FIXTURE_DATA(fanotify) *self,
203 				   uint64_t expect_mask)
204 {
205 	uint64_t mntid, mask;
206 
207 	mntid = expect_notify(_metadata, self, &mask);
208 	ASSERT_EQ(expect_mask, mask);
209 
210 	return mntid;
211 }
212 
213 
expect_notify_mask_n(struct __test_metadata * const _metadata,FIXTURE_DATA (fanotify)* self,uint64_t mask,unsigned int n,uint64_t mnts[])214 static void expect_notify_mask_n(struct __test_metadata *const _metadata,
215 				 FIXTURE_DATA(fanotify) *self,
216 				 uint64_t mask, unsigned int n, uint64_t mnts[])
217 {
218 	unsigned int i;
219 
220 	for (i = 0; i < n; i++)
221 		mnts[i] = expect_notify_mask(_metadata, self, mask);
222 }
223 
verify_mount_ids(struct __test_metadata * const _metadata,const uint64_t list1[],const uint64_t list2[],size_t num)224 static void verify_mount_ids(struct __test_metadata *const _metadata,
225 			     const uint64_t list1[], const uint64_t list2[],
226 			     size_t num)
227 {
228 	unsigned int i, j;
229 
230 	// Check that neither list has any duplicates
231 	for (i = 0; i < num; i++) {
232 		for (j = 0; j < num; j++) {
233 			if (i != j) {
234 				ASSERT_NE(list1[i], list1[j]);
235 				ASSERT_NE(list2[i], list2[j]);
236 			}
237 		}
238 	}
239 	// Check that all list1 memebers can be found in list2. Together with
240 	// the above it means that the list1 and list2 represent the same sets.
241 	for (i = 0; i < num; i++) {
242 		for (j = 0; j < num; j++) {
243 			if (list1[i] == list2[j])
244 				break;
245 		}
246 		ASSERT_NE(j, num);
247 	}
248 }
249 
check_mounted(struct __test_metadata * const _metadata,const uint64_t mnts[],size_t num)250 static void check_mounted(struct __test_metadata *const _metadata,
251 			  const uint64_t mnts[], size_t num)
252 {
253 	ssize_t ret;
254 	uint64_t *list;
255 
256 	list = malloc((num + 1) * sizeof(list[0]));
257 	ASSERT_NE(list, NULL);
258 
259 	ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
260 	ASSERT_EQ(ret, num);
261 
262 	verify_mount_ids(_metadata, mnts, list, num);
263 
264 	free(list);
265 }
266 
setup_mount_tree(struct __test_metadata * const _metadata,int log2_num)267 static void setup_mount_tree(struct __test_metadata *const _metadata,
268 			    int log2_num)
269 {
270 	int ret, i;
271 
272 	ret = mount("", "/", NULL, MS_SHARED, NULL);
273 	ASSERT_EQ(ret, 0);
274 
275 	for (i = 0; i < log2_num; i++) {
276 		ret = mount("/", "/", NULL, MS_BIND, NULL);
277 		ASSERT_EQ(ret, 0);
278 	}
279 }
280 
TEST_F(fanotify,bind)281 TEST_F(fanotify, bind)
282 {
283 	int ret;
284 	uint64_t mnts[2] = { self->root_id };
285 
286 	ret = mount("/", "/", NULL, MS_BIND, NULL);
287 	ASSERT_EQ(ret, 0);
288 
289 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
290 	ASSERT_NE(mnts[0], mnts[1]);
291 
292 	check_mounted(_metadata, mnts, 2);
293 
294 	// Cleanup
295 	uint64_t detach_id;
296 	ret = umount("/");
297 	ASSERT_EQ(ret, 0);
298 
299 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
300 	ASSERT_EQ(detach_id, mnts[1]);
301 
302 	check_mounted(_metadata, mnts, 1);
303 }
304 
TEST_F(fanotify,move)305 TEST_F(fanotify, move)
306 {
307 	int ret;
308 	uint64_t mnts[2] = { self->root_id };
309 	uint64_t move_id;
310 
311 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
312 	ASSERT_EQ(ret, 0);
313 
314 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
315 	ASSERT_NE(mnts[0], mnts[1]);
316 
317 	check_mounted(_metadata, mnts, 2);
318 
319 	ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
320 	ASSERT_EQ(ret, 0);
321 
322 	move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
323 	ASSERT_EQ(move_id, mnts[1]);
324 
325 	// Cleanup
326 	ret = umount("/b");
327 	ASSERT_EQ(ret, 0);
328 
329 	check_mounted(_metadata, mnts, 1);
330 }
331 
TEST_F(fanotify,propagate)332 TEST_F(fanotify, propagate)
333 {
334 	const unsigned int log2_num = 4;
335 	const unsigned int num = (1 << log2_num);
336 	uint64_t mnts[num];
337 
338 	setup_mount_tree(_metadata, log2_num);
339 
340 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
341 
342 	mnts[0] = self->root_id;
343 	check_mounted(_metadata, mnts, num);
344 
345 	// Cleanup
346 	int ret;
347 	uint64_t mnts2[num];
348 	ret = umount2("/", MNT_DETACH);
349 	ASSERT_EQ(ret, 0);
350 
351 	ret = mount("", "/", NULL, MS_PRIVATE, NULL);
352 	ASSERT_EQ(ret, 0);
353 
354 	mnts2[0] = self->root_id;
355 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
356 	verify_mount_ids(_metadata, mnts, mnts2, num);
357 
358 	check_mounted(_metadata, mnts, 1);
359 }
360 
TEST_F(fanotify,fsmount)361 TEST_F(fanotify, fsmount)
362 {
363 	int ret, fs, mnt;
364 	uint64_t mnts[2] = { self->root_id };
365 
366 	fs = fsopen("tmpfs", 0);
367 	ASSERT_GE(fs, 0);
368 
369 	ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
370 	ASSERT_EQ(ret, 0);
371 
372 	mnt = fsmount(fs, 0, 0);
373 	ASSERT_GE(mnt, 0);
374 
375 	close(fs);
376 
377 	ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
378 	ASSERT_EQ(ret, 0);
379 
380 	close(mnt);
381 
382 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
383 	ASSERT_NE(mnts[0], mnts[1]);
384 
385 	check_mounted(_metadata, mnts, 2);
386 
387 	// Cleanup
388 	uint64_t detach_id;
389 	ret = umount("/a");
390 	ASSERT_EQ(ret, 0);
391 
392 	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
393 	ASSERT_EQ(detach_id, mnts[1]);
394 
395 	check_mounted(_metadata, mnts, 1);
396 }
397 
TEST_F(fanotify,reparent)398 TEST_F(fanotify, reparent)
399 {
400 	uint64_t mnts[6] = { self->root_id };
401 	uint64_t dmnts[3];
402 	uint64_t masks[3];
403 	unsigned int i;
404 	int ret;
405 
406 	// Create setup with a[1] -> b[2] propagation
407 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
408 	ASSERT_EQ(ret, 0);
409 
410 	ret = mount("", "/a", NULL, MS_SHARED, NULL);
411 	ASSERT_EQ(ret, 0);
412 
413 	ret = mount("/a", "/b", NULL, MS_BIND, NULL);
414 	ASSERT_EQ(ret, 0);
415 
416 	ret = mount("", "/b", NULL, MS_SLAVE, NULL);
417 	ASSERT_EQ(ret, 0);
418 
419 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
420 
421 	check_mounted(_metadata, mnts, 3);
422 
423 	// Mount on a[3], which is propagated to b[4]
424 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
425 	ASSERT_EQ(ret, 0);
426 
427 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
428 
429 	check_mounted(_metadata, mnts, 5);
430 
431 	// Mount on b[5], not propagated
432 	ret = mount("/", "/b", NULL, MS_BIND, NULL);
433 	ASSERT_EQ(ret, 0);
434 
435 	mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
436 
437 	check_mounted(_metadata, mnts, 6);
438 
439 	// Umount a[3], which is propagated to b[4], but not b[5]
440 	// This will result in b[5] "falling" on b[2]
441 	ret = umount("/a");
442 	ASSERT_EQ(ret, 0);
443 
444 	expect_notify_n(_metadata, self, 3, masks, dmnts);
445 	verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
446 
447 	for (i = 0; i < 3; i++) {
448 		if (dmnts[i] == mnts[5]) {
449 			ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
450 		} else {
451 			ASSERT_EQ(masks[i], FAN_MNT_DETACH);
452 		}
453 	}
454 
455 	mnts[3] = mnts[5];
456 	check_mounted(_metadata, mnts, 4);
457 
458 	// Cleanup
459 	ret = umount("/b");
460 	ASSERT_EQ(ret, 0);
461 
462 	ret = umount("/a");
463 	ASSERT_EQ(ret, 0);
464 
465 	ret = umount("/b");
466 	ASSERT_EQ(ret, 0);
467 
468 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
469 	verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
470 
471 	check_mounted(_metadata, mnts, 1);
472 }
473 
TEST_F(fanotify,rmdir)474 TEST_F(fanotify, rmdir)
475 {
476 	uint64_t mnts[3] = { self->root_id };
477 	int ret;
478 
479 	ret = mount("/", "/a", NULL, MS_BIND, NULL);
480 	ASSERT_EQ(ret, 0);
481 
482 	ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
483 	ASSERT_EQ(ret, 0);
484 
485 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
486 
487 	check_mounted(_metadata, mnts, 3);
488 
489 	ret = chdir("/a");
490 	ASSERT_EQ(ret, 0);
491 
492 	ret = fork();
493 	ASSERT_GE(ret, 0);
494 
495 	if (ret == 0) {
496 		chdir("/");
497 		unshare(CLONE_NEWNS);
498 		mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
499 		umount2("/a", MNT_DETACH);
500 		// This triggers a detach in the other namespace
501 		rmdir("/a");
502 		exit(0);
503 	}
504 	wait(NULL);
505 
506 	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
507 	check_mounted(_metadata, mnts, 1);
508 
509 	// Cleanup
510 	ret = chdir("/");
511 	ASSERT_EQ(ret, 0);
512 }
513 
TEST_F(fanotify,pivot_root)514 TEST_F(fanotify, pivot_root)
515 {
516 	uint64_t mnts[3] = { self->root_id };
517 	uint64_t mnts2[3];
518 	int ret;
519 
520 	ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
521 	ASSERT_EQ(ret, 0);
522 
523 	mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
524 
525 	ret = mkdir("/a/new", 0700);
526 	ASSERT_EQ(ret, 0);
527 
528 	ret = mkdir("/a/old", 0700);
529 	ASSERT_EQ(ret, 0);
530 
531 	ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
532 	ASSERT_EQ(ret, 0);
533 
534 	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
535 	check_mounted(_metadata, mnts, 3);
536 
537 	ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
538 	ASSERT_EQ(ret, 0);
539 
540 	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
541 	verify_mount_ids(_metadata, mnts, mnts2, 2);
542 	check_mounted(_metadata, mnts, 3);
543 
544 	// Cleanup
545 	ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
546 	ASSERT_EQ(ret, 0);
547 
548 	ret = umount("/a/new");
549 	ASSERT_EQ(ret, 0);
550 
551 	ret = umount("/a");
552 	ASSERT_EQ(ret, 0);
553 
554 	check_mounted(_metadata, mnts, 1);
555 }
556 
557 TEST_HARNESS_MAIN
558