1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21
22 #include "../kselftest_harness.h"
23
24 #ifndef CLONE_NEWNS
25 #define CLONE_NEWNS 0x00020000
26 #endif
27
28 #ifndef CLONE_NEWUSER
29 #define CLONE_NEWUSER 0x10000000
30 #endif
31
32 #ifndef MS_REC
33 #define MS_REC 16384
34 #endif
35
36 #ifndef MS_RELATIME
37 #define MS_RELATIME (1 << 21)
38 #endif
39
40 #ifndef MS_STRICTATIME
41 #define MS_STRICTATIME (1 << 24)
42 #endif
43
44 #ifndef MOUNT_ATTR_RDONLY
45 #define MOUNT_ATTR_RDONLY 0x00000001
46 #endif
47
48 #ifndef MOUNT_ATTR_NOSUID
49 #define MOUNT_ATTR_NOSUID 0x00000002
50 #endif
51
52 #ifndef MOUNT_ATTR_NOEXEC
53 #define MOUNT_ATTR_NOEXEC 0x00000008
54 #endif
55
56 #ifndef MOUNT_ATTR_NODIRATIME
57 #define MOUNT_ATTR_NODIRATIME 0x00000080
58 #endif
59
60 #ifndef MOUNT_ATTR__ATIME
61 #define MOUNT_ATTR__ATIME 0x00000070
62 #endif
63
64 #ifndef MOUNT_ATTR_RELATIME
65 #define MOUNT_ATTR_RELATIME 0x00000000
66 #endif
67
68 #ifndef MOUNT_ATTR_NOATIME
69 #define MOUNT_ATTR_NOATIME 0x00000010
70 #endif
71
72 #ifndef MOUNT_ATTR_STRICTATIME
73 #define MOUNT_ATTR_STRICTATIME 0x00000020
74 #endif
75
76 #ifndef AT_RECURSIVE
77 #define AT_RECURSIVE 0x8000
78 #endif
79
80 #ifndef MS_SHARED
81 #define MS_SHARED (1 << 20)
82 #endif
83
84 #define DEFAULT_THREADS 4
85 #define ptr_to_int(p) ((int)((intptr_t)(p)))
86 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
87
88 #ifndef __NR_mount_setattr
89 #if defined __alpha__
90 #define __NR_mount_setattr 552
91 #elif defined _MIPS_SIM
92 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
93 #define __NR_mount_setattr (442 + 4000)
94 #endif
95 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
96 #define __NR_mount_setattr (442 + 6000)
97 #endif
98 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
99 #define __NR_mount_setattr (442 + 5000)
100 #endif
101 #elif defined __ia64__
102 #define __NR_mount_setattr (442 + 1024)
103 #else
104 #define __NR_mount_setattr 442
105 #endif
106 #endif
107
108 #ifndef __NR_open_tree
109 #if defined __alpha__
110 #define __NR_open_tree 538
111 #elif defined _MIPS_SIM
112 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
113 #define __NR_open_tree 4428
114 #endif
115 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
116 #define __NR_open_tree 6428
117 #endif
118 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
119 #define __NR_open_tree 5428
120 #endif
121 #elif defined __ia64__
122 #define __NR_open_tree (428 + 1024)
123 #else
124 #define __NR_open_tree 428
125 #endif
126 #endif
127
128 #ifndef MOUNT_ATTR_IDMAP
129 #define MOUNT_ATTR_IDMAP 0x00100000
130 #endif
131
132 #ifndef MOUNT_ATTR_NOSYMFOLLOW
133 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
134 #endif
135
sys_mount_setattr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)136 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
137 struct mount_attr *attr, size_t size)
138 {
139 return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
140 }
141
142 #ifndef OPEN_TREE_CLONE
143 #define OPEN_TREE_CLONE 1
144 #endif
145
146 #ifndef OPEN_TREE_CLOEXEC
147 #define OPEN_TREE_CLOEXEC O_CLOEXEC
148 #endif
149
150 #ifndef AT_RECURSIVE
151 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
152 #endif
153
sys_open_tree(int dfd,const char * filename,unsigned int flags)154 static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
155 {
156 return syscall(__NR_open_tree, dfd, filename, flags);
157 }
158
write_nointr(int fd,const void * buf,size_t count)159 static ssize_t write_nointr(int fd, const void *buf, size_t count)
160 {
161 ssize_t ret;
162
163 do {
164 ret = write(fd, buf, count);
165 } while (ret < 0 && errno == EINTR);
166
167 return ret;
168 }
169
write_file(const char * path,const void * buf,size_t count)170 static int write_file(const char *path, const void *buf, size_t count)
171 {
172 int fd;
173 ssize_t ret;
174
175 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
176 if (fd < 0)
177 return -1;
178
179 ret = write_nointr(fd, buf, count);
180 close(fd);
181 if (ret < 0 || (size_t)ret != count)
182 return -1;
183
184 return 0;
185 }
186
create_and_enter_userns(void)187 static int create_and_enter_userns(void)
188 {
189 uid_t uid;
190 gid_t gid;
191 char map[100];
192
193 uid = getuid();
194 gid = getgid();
195
196 if (unshare(CLONE_NEWUSER))
197 return -1;
198
199 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
200 errno != ENOENT)
201 return -1;
202
203 snprintf(map, sizeof(map), "0 %d 1", uid);
204 if (write_file("/proc/self/uid_map", map, strlen(map)))
205 return -1;
206
207
208 snprintf(map, sizeof(map), "0 %d 1", gid);
209 if (write_file("/proc/self/gid_map", map, strlen(map)))
210 return -1;
211
212 if (setgid(0))
213 return -1;
214
215 if (setuid(0))
216 return -1;
217
218 return 0;
219 }
220
prepare_unpriv_mountns(void)221 static int prepare_unpriv_mountns(void)
222 {
223 if (create_and_enter_userns())
224 return -1;
225
226 if (unshare(CLONE_NEWNS))
227 return -1;
228
229 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
230 return -1;
231
232 return 0;
233 }
234
235 #ifndef ST_NOSYMFOLLOW
236 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
237 #endif
238
read_mnt_flags(const char * path)239 static int read_mnt_flags(const char *path)
240 {
241 int ret;
242 struct statvfs stat;
243 unsigned int mnt_flags;
244
245 ret = statvfs(path, &stat);
246 if (ret != 0)
247 return -EINVAL;
248
249 if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
250 ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
251 ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
252 return -EINVAL;
253
254 mnt_flags = 0;
255 if (stat.f_flag & ST_RDONLY)
256 mnt_flags |= MS_RDONLY;
257 if (stat.f_flag & ST_NOSUID)
258 mnt_flags |= MS_NOSUID;
259 if (stat.f_flag & ST_NODEV)
260 mnt_flags |= MS_NODEV;
261 if (stat.f_flag & ST_NOEXEC)
262 mnt_flags |= MS_NOEXEC;
263 if (stat.f_flag & ST_NOATIME)
264 mnt_flags |= MS_NOATIME;
265 if (stat.f_flag & ST_NODIRATIME)
266 mnt_flags |= MS_NODIRATIME;
267 if (stat.f_flag & ST_RELATIME)
268 mnt_flags |= MS_RELATIME;
269 if (stat.f_flag & ST_SYNCHRONOUS)
270 mnt_flags |= MS_SYNCHRONOUS;
271 if (stat.f_flag & ST_MANDLOCK)
272 mnt_flags |= ST_MANDLOCK;
273 if (stat.f_flag & ST_NOSYMFOLLOW)
274 mnt_flags |= ST_NOSYMFOLLOW;
275
276 return mnt_flags;
277 }
278
get_field(char * src,int nfields)279 static char *get_field(char *src, int nfields)
280 {
281 int i;
282 char *p = src;
283
284 for (i = 0; i < nfields; i++) {
285 while (*p && *p != ' ' && *p != '\t')
286 p++;
287
288 if (!*p)
289 break;
290
291 p++;
292 }
293
294 return p;
295 }
296
null_endofword(char * word)297 static void null_endofword(char *word)
298 {
299 while (*word && *word != ' ' && *word != '\t')
300 word++;
301 *word = '\0';
302 }
303
is_shared_mount(const char * path)304 static bool is_shared_mount(const char *path)
305 {
306 size_t len = 0;
307 char *line = NULL;
308 FILE *f = NULL;
309
310 f = fopen("/proc/self/mountinfo", "re");
311 if (!f)
312 return false;
313
314 while (getline(&line, &len, f) != -1) {
315 char *opts, *target;
316
317 target = get_field(line, 4);
318 if (!target)
319 continue;
320
321 opts = get_field(target, 2);
322 if (!opts)
323 continue;
324
325 null_endofword(target);
326
327 if (strcmp(target, path) != 0)
328 continue;
329
330 null_endofword(opts);
331 if (strstr(opts, "shared:"))
332 return true;
333 }
334
335 free(line);
336 fclose(f);
337
338 return false;
339 }
340
mount_setattr_thread(void * data)341 static void *mount_setattr_thread(void *data)
342 {
343 struct mount_attr attr = {
344 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
345 .attr_clr = 0,
346 .propagation = MS_SHARED,
347 };
348
349 if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
350 pthread_exit(int_to_ptr(-1));
351
352 pthread_exit(int_to_ptr(0));
353 }
354
355 /* Attempt to de-conflict with the selftests tree. */
356 #ifndef SKIP
357 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
358 #endif
359
mount_setattr_supported(void)360 static bool mount_setattr_supported(void)
361 {
362 int ret;
363
364 ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
365 if (ret < 0 && errno == ENOSYS)
366 return false;
367
368 return true;
369 }
370
FIXTURE(mount_setattr)371 FIXTURE(mount_setattr) {
372 };
373
374 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
375 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
376
FIXTURE_SETUP(mount_setattr)377 FIXTURE_SETUP(mount_setattr)
378 {
379 int fd = -EBADF;
380
381 if (!mount_setattr_supported())
382 SKIP(return, "mount_setattr syscall not supported");
383
384 ASSERT_EQ(prepare_unpriv_mountns(), 0);
385
386 (void)umount2("/mnt", MNT_DETACH);
387 (void)umount2("/tmp", MNT_DETACH);
388
389 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
390 "size=100000,mode=700"), 0);
391
392 ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
393
394 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
395 "size=100000,mode=700"), 0);
396
397 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
398
399 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
400 "size=100000,mode=700"), 0);
401
402 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
403 "size=100000,mode=700"), 0);
404
405 ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
406
407 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
408 "size=100000,mode=700"), 0);
409
410 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
411
412 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
413
414 ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
415
416 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
417 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
418
419 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
420
421 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
422 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
423
424 fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
425 ASSERT_GT(fd, 0);
426 ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
427 ASSERT_EQ(close(fd), 0);
428 }
429
FIXTURE_TEARDOWN(mount_setattr)430 FIXTURE_TEARDOWN(mount_setattr)
431 {
432 if (!mount_setattr_supported())
433 SKIP(return, "mount_setattr syscall not supported");
434
435 (void)umount2("/mnt/A", MNT_DETACH);
436 (void)umount2("/tmp", MNT_DETACH);
437 }
438
TEST_F(mount_setattr,invalid_attributes)439 TEST_F(mount_setattr, invalid_attributes)
440 {
441 struct mount_attr invalid_attr = {
442 .attr_set = (1U << 31),
443 };
444
445 if (!mount_setattr_supported())
446 SKIP(return, "mount_setattr syscall not supported");
447
448 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
449 sizeof(invalid_attr)), 0);
450
451 invalid_attr.attr_set = 0;
452 invalid_attr.attr_clr = (1U << 31);
453 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
454 sizeof(invalid_attr)), 0);
455
456 invalid_attr.attr_clr = 0;
457 invalid_attr.propagation = (1U << 31);
458 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
459 sizeof(invalid_attr)), 0);
460
461 invalid_attr.attr_set = (1U << 31);
462 invalid_attr.attr_clr = (1U << 31);
463 invalid_attr.propagation = (1U << 31);
464 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
465 sizeof(invalid_attr)), 0);
466
467 ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
468 sizeof(invalid_attr)), 0);
469 }
470
TEST_F(mount_setattr,extensibility)471 TEST_F(mount_setattr, extensibility)
472 {
473 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
474 char *s = "dummy";
475 struct mount_attr invalid_attr = {};
476 struct mount_attr_large {
477 struct mount_attr attr1;
478 struct mount_attr attr2;
479 struct mount_attr attr3;
480 } large_attr = {};
481
482 if (!mount_setattr_supported())
483 SKIP(return, "mount_setattr syscall not supported");
484
485 old_flags = read_mnt_flags("/mnt/A");
486 ASSERT_GT(old_flags, 0);
487
488 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
489 sizeof(invalid_attr)), 0);
490 ASSERT_EQ(errno, EFAULT);
491
492 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
493 sizeof(invalid_attr)), 0);
494 ASSERT_EQ(errno, EINVAL);
495
496 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
497 ASSERT_EQ(errno, EINVAL);
498
499 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
500 sizeof(invalid_attr) / 2), 0);
501 ASSERT_EQ(errno, EINVAL);
502
503 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
504 sizeof(invalid_attr) / 2), 0);
505 ASSERT_EQ(errno, EINVAL);
506
507 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
508 (void *)&large_attr, sizeof(large_attr)), 0);
509
510 large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
511 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
512 (void *)&large_attr, sizeof(large_attr)), 0);
513
514 large_attr.attr3.attr_set = 0;
515 large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
516 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
517 (void *)&large_attr, sizeof(large_attr)), 0);
518
519 expected_flags = old_flags;
520 expected_flags |= MS_RDONLY;
521
522 new_flags = read_mnt_flags("/mnt/A");
523 ASSERT_EQ(new_flags, expected_flags);
524
525 new_flags = read_mnt_flags("/mnt/A/AA");
526 ASSERT_EQ(new_flags, expected_flags);
527
528 new_flags = read_mnt_flags("/mnt/A/AA/B");
529 ASSERT_EQ(new_flags, expected_flags);
530
531 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
532 ASSERT_EQ(new_flags, expected_flags);
533 }
534
TEST_F(mount_setattr,basic)535 TEST_F(mount_setattr, basic)
536 {
537 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
538 struct mount_attr attr = {
539 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
540 .attr_clr = MOUNT_ATTR__ATIME,
541 };
542
543 if (!mount_setattr_supported())
544 SKIP(return, "mount_setattr syscall not supported");
545
546 old_flags = read_mnt_flags("/mnt/A");
547 ASSERT_GT(old_flags, 0);
548
549 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
550
551 expected_flags = old_flags;
552 expected_flags |= MS_RDONLY;
553 expected_flags |= MS_NOEXEC;
554 expected_flags &= ~MS_NOATIME;
555 expected_flags |= MS_RELATIME;
556
557 new_flags = read_mnt_flags("/mnt/A");
558 ASSERT_EQ(new_flags, expected_flags);
559
560 new_flags = read_mnt_flags("/mnt/A/AA");
561 ASSERT_EQ(new_flags, old_flags);
562
563 new_flags = read_mnt_flags("/mnt/A/AA/B");
564 ASSERT_EQ(new_flags, old_flags);
565
566 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
567 ASSERT_EQ(new_flags, old_flags);
568 }
569
TEST_F(mount_setattr,basic_recursive)570 TEST_F(mount_setattr, basic_recursive)
571 {
572 int fd;
573 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
574 struct mount_attr attr = {
575 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
576 .attr_clr = MOUNT_ATTR__ATIME,
577 };
578
579 if (!mount_setattr_supported())
580 SKIP(return, "mount_setattr syscall not supported");
581
582 old_flags = read_mnt_flags("/mnt/A");
583 ASSERT_GT(old_flags, 0);
584
585 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
586
587 expected_flags = old_flags;
588 expected_flags |= MS_RDONLY;
589 expected_flags |= MS_NOEXEC;
590 expected_flags &= ~MS_NOATIME;
591 expected_flags |= MS_RELATIME;
592
593 new_flags = read_mnt_flags("/mnt/A");
594 ASSERT_EQ(new_flags, expected_flags);
595
596 new_flags = read_mnt_flags("/mnt/A/AA");
597 ASSERT_EQ(new_flags, expected_flags);
598
599 new_flags = read_mnt_flags("/mnt/A/AA/B");
600 ASSERT_EQ(new_flags, expected_flags);
601
602 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
603 ASSERT_EQ(new_flags, expected_flags);
604
605 memset(&attr, 0, sizeof(attr));
606 attr.attr_clr = MOUNT_ATTR_RDONLY;
607 attr.propagation = MS_SHARED;
608 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
609
610 expected_flags &= ~MS_RDONLY;
611 new_flags = read_mnt_flags("/mnt/A");
612 ASSERT_EQ(new_flags, expected_flags);
613
614 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
615
616 new_flags = read_mnt_flags("/mnt/A/AA");
617 ASSERT_EQ(new_flags, expected_flags);
618
619 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
620
621 new_flags = read_mnt_flags("/mnt/A/AA/B");
622 ASSERT_EQ(new_flags, expected_flags);
623
624 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
625
626 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
627 ASSERT_EQ(new_flags, expected_flags);
628
629 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
630
631 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
632 ASSERT_GE(fd, 0);
633
634 /*
635 * We're holding a fd open for writing so this needs to fail somewhere
636 * in the middle and the mount options need to be unchanged.
637 */
638 attr.attr_set = MOUNT_ATTR_RDONLY;
639 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
640
641 new_flags = read_mnt_flags("/mnt/A");
642 ASSERT_EQ(new_flags, expected_flags);
643
644 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
645
646 new_flags = read_mnt_flags("/mnt/A/AA");
647 ASSERT_EQ(new_flags, expected_flags);
648
649 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
650
651 new_flags = read_mnt_flags("/mnt/A/AA/B");
652 ASSERT_EQ(new_flags, expected_flags);
653
654 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
655
656 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
657 ASSERT_EQ(new_flags, expected_flags);
658
659 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
660
661 EXPECT_EQ(close(fd), 0);
662 }
663
TEST_F(mount_setattr,mount_has_writers)664 TEST_F(mount_setattr, mount_has_writers)
665 {
666 int fd, dfd;
667 unsigned int old_flags = 0, new_flags = 0;
668 struct mount_attr attr = {
669 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
670 .attr_clr = MOUNT_ATTR__ATIME,
671 .propagation = MS_SHARED,
672 };
673
674 if (!mount_setattr_supported())
675 SKIP(return, "mount_setattr syscall not supported");
676
677 old_flags = read_mnt_flags("/mnt/A");
678 ASSERT_GT(old_flags, 0);
679
680 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
681 ASSERT_GE(fd, 0);
682
683 /*
684 * We're holding a fd open to a mount somwhere in the middle so this
685 * needs to fail somewhere in the middle. After this the mount options
686 * need to be unchanged.
687 */
688 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
689
690 new_flags = read_mnt_flags("/mnt/A");
691 ASSERT_EQ(new_flags, old_flags);
692
693 ASSERT_EQ(is_shared_mount("/mnt/A"), false);
694
695 new_flags = read_mnt_flags("/mnt/A/AA");
696 ASSERT_EQ(new_flags, old_flags);
697
698 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
699
700 new_flags = read_mnt_flags("/mnt/A/AA/B");
701 ASSERT_EQ(new_flags, old_flags);
702
703 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
704
705 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
706 ASSERT_EQ(new_flags, old_flags);
707
708 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
709
710 dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
711 ASSERT_GE(dfd, 0);
712 EXPECT_EQ(fsync(dfd), 0);
713 EXPECT_EQ(close(dfd), 0);
714
715 EXPECT_EQ(fsync(fd), 0);
716 EXPECT_EQ(close(fd), 0);
717
718 /* All writers are gone so this should succeed. */
719 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
720 }
721
TEST_F(mount_setattr,mixed_mount_options)722 TEST_F(mount_setattr, mixed_mount_options)
723 {
724 unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
725 struct mount_attr attr = {
726 .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
727 .attr_set = MOUNT_ATTR_RELATIME,
728 };
729
730 if (!mount_setattr_supported())
731 SKIP(return, "mount_setattr syscall not supported");
732
733 old_flags1 = read_mnt_flags("/mnt/B");
734 ASSERT_GT(old_flags1, 0);
735
736 old_flags2 = read_mnt_flags("/mnt/B/BB");
737 ASSERT_GT(old_flags2, 0);
738
739 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
740
741 expected_flags = old_flags2;
742 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
743 expected_flags |= MS_RELATIME;
744
745 new_flags = read_mnt_flags("/mnt/B");
746 ASSERT_EQ(new_flags, expected_flags);
747
748 expected_flags = old_flags2;
749 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
750 expected_flags |= MS_RELATIME;
751
752 new_flags = read_mnt_flags("/mnt/B/BB");
753 ASSERT_EQ(new_flags, expected_flags);
754 }
755
TEST_F(mount_setattr,time_changes)756 TEST_F(mount_setattr, time_changes)
757 {
758 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
759 struct mount_attr attr = {
760 .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
761 };
762
763 if (!mount_setattr_supported())
764 SKIP(return, "mount_setattr syscall not supported");
765
766 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
767
768 attr.attr_set = MOUNT_ATTR_STRICTATIME;
769 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
770
771 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
772 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
773
774 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
775 attr.attr_clr = MOUNT_ATTR__ATIME;
776 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
777
778 attr.attr_set = 0;
779 attr.attr_clr = MOUNT_ATTR_STRICTATIME;
780 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
781
782 attr.attr_clr = MOUNT_ATTR_NOATIME;
783 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
784
785 old_flags = read_mnt_flags("/mnt/A");
786 ASSERT_GT(old_flags, 0);
787
788 attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
789 attr.attr_clr = MOUNT_ATTR__ATIME;
790 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
791
792 expected_flags = old_flags;
793 expected_flags |= MS_NOATIME;
794 expected_flags |= MS_NODIRATIME;
795
796 new_flags = read_mnt_flags("/mnt/A");
797 ASSERT_EQ(new_flags, expected_flags);
798
799 new_flags = read_mnt_flags("/mnt/A/AA");
800 ASSERT_EQ(new_flags, expected_flags);
801
802 new_flags = read_mnt_flags("/mnt/A/AA/B");
803 ASSERT_EQ(new_flags, expected_flags);
804
805 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
806 ASSERT_EQ(new_flags, expected_flags);
807
808 memset(&attr, 0, sizeof(attr));
809 attr.attr_set &= ~MOUNT_ATTR_NOATIME;
810 attr.attr_set |= MOUNT_ATTR_RELATIME;
811 attr.attr_clr |= MOUNT_ATTR__ATIME;
812 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
813
814 expected_flags &= ~MS_NOATIME;
815 expected_flags |= MS_RELATIME;
816
817 new_flags = read_mnt_flags("/mnt/A");
818 ASSERT_EQ(new_flags, expected_flags);
819
820 new_flags = read_mnt_flags("/mnt/A/AA");
821 ASSERT_EQ(new_flags, expected_flags);
822
823 new_flags = read_mnt_flags("/mnt/A/AA/B");
824 ASSERT_EQ(new_flags, expected_flags);
825
826 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
827 ASSERT_EQ(new_flags, expected_flags);
828
829 memset(&attr, 0, sizeof(attr));
830 attr.attr_set &= ~MOUNT_ATTR_RELATIME;
831 attr.attr_set |= MOUNT_ATTR_STRICTATIME;
832 attr.attr_clr |= MOUNT_ATTR__ATIME;
833 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
834
835 expected_flags &= ~MS_RELATIME;
836
837 new_flags = read_mnt_flags("/mnt/A");
838 ASSERT_EQ(new_flags, expected_flags);
839
840 new_flags = read_mnt_flags("/mnt/A/AA");
841 ASSERT_EQ(new_flags, expected_flags);
842
843 new_flags = read_mnt_flags("/mnt/A/AA/B");
844 ASSERT_EQ(new_flags, expected_flags);
845
846 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
847 ASSERT_EQ(new_flags, expected_flags);
848
849 memset(&attr, 0, sizeof(attr));
850 attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
851 attr.attr_set |= MOUNT_ATTR_NOATIME;
852 attr.attr_clr |= MOUNT_ATTR__ATIME;
853 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
854
855 expected_flags |= MS_NOATIME;
856 new_flags = read_mnt_flags("/mnt/A");
857 ASSERT_EQ(new_flags, expected_flags);
858
859 new_flags = read_mnt_flags("/mnt/A/AA");
860 ASSERT_EQ(new_flags, expected_flags);
861
862 new_flags = read_mnt_flags("/mnt/A/AA/B");
863 ASSERT_EQ(new_flags, expected_flags);
864
865 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
866 ASSERT_EQ(new_flags, expected_flags);
867
868 memset(&attr, 0, sizeof(attr));
869 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
870
871 new_flags = read_mnt_flags("/mnt/A");
872 ASSERT_EQ(new_flags, expected_flags);
873
874 new_flags = read_mnt_flags("/mnt/A/AA");
875 ASSERT_EQ(new_flags, expected_flags);
876
877 new_flags = read_mnt_flags("/mnt/A/AA/B");
878 ASSERT_EQ(new_flags, expected_flags);
879
880 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
881 ASSERT_EQ(new_flags, expected_flags);
882
883 memset(&attr, 0, sizeof(attr));
884 attr.attr_clr = MOUNT_ATTR_NODIRATIME;
885 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
886
887 expected_flags &= ~MS_NODIRATIME;
888
889 new_flags = read_mnt_flags("/mnt/A");
890 ASSERT_EQ(new_flags, expected_flags);
891
892 new_flags = read_mnt_flags("/mnt/A/AA");
893 ASSERT_EQ(new_flags, expected_flags);
894
895 new_flags = read_mnt_flags("/mnt/A/AA/B");
896 ASSERT_EQ(new_flags, expected_flags);
897
898 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
899 ASSERT_EQ(new_flags, expected_flags);
900 }
901
TEST_F(mount_setattr,multi_threaded)902 TEST_F(mount_setattr, multi_threaded)
903 {
904 int i, j, nthreads, ret = 0;
905 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
906 pthread_attr_t pattr;
907 pthread_t threads[DEFAULT_THREADS];
908
909 if (!mount_setattr_supported())
910 SKIP(return, "mount_setattr syscall not supported");
911
912 old_flags = read_mnt_flags("/mnt/A");
913 ASSERT_GT(old_flags, 0);
914
915 /* Try to change mount options from multiple threads. */
916 nthreads = get_nprocs_conf();
917 if (nthreads > DEFAULT_THREADS)
918 nthreads = DEFAULT_THREADS;
919
920 pthread_attr_init(&pattr);
921 for (i = 0; i < nthreads; i++)
922 ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
923
924 for (j = 0; j < i; j++) {
925 void *retptr = NULL;
926
927 EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
928
929 ret += ptr_to_int(retptr);
930 EXPECT_EQ(ret, 0);
931 }
932 pthread_attr_destroy(&pattr);
933
934 ASSERT_EQ(ret, 0);
935
936 expected_flags = old_flags;
937 expected_flags |= MS_RDONLY;
938 expected_flags |= MS_NOSUID;
939 new_flags = read_mnt_flags("/mnt/A");
940 ASSERT_EQ(new_flags, expected_flags);
941
942 ASSERT_EQ(is_shared_mount("/mnt/A"), true);
943
944 new_flags = read_mnt_flags("/mnt/A/AA");
945 ASSERT_EQ(new_flags, expected_flags);
946
947 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
948
949 new_flags = read_mnt_flags("/mnt/A/AA/B");
950 ASSERT_EQ(new_flags, expected_flags);
951
952 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
953
954 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
955 ASSERT_EQ(new_flags, expected_flags);
956
957 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
958 }
959
TEST_F(mount_setattr,wrong_user_namespace)960 TEST_F(mount_setattr, wrong_user_namespace)
961 {
962 int ret;
963 struct mount_attr attr = {
964 .attr_set = MOUNT_ATTR_RDONLY,
965 };
966
967 if (!mount_setattr_supported())
968 SKIP(return, "mount_setattr syscall not supported");
969
970 EXPECT_EQ(create_and_enter_userns(), 0);
971 ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
972 ASSERT_LT(ret, 0);
973 ASSERT_EQ(errno, EPERM);
974 }
975
TEST_F(mount_setattr,wrong_mount_namespace)976 TEST_F(mount_setattr, wrong_mount_namespace)
977 {
978 int fd, ret;
979 struct mount_attr attr = {
980 .attr_set = MOUNT_ATTR_RDONLY,
981 };
982
983 if (!mount_setattr_supported())
984 SKIP(return, "mount_setattr syscall not supported");
985
986 fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
987 ASSERT_GE(fd, 0);
988
989 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
990
991 ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
992 ASSERT_LT(ret, 0);
993 ASSERT_EQ(errno, EINVAL);
994 }
995
FIXTURE(mount_setattr_idmapped)996 FIXTURE(mount_setattr_idmapped) {
997 };
998
FIXTURE_SETUP(mount_setattr_idmapped)999 FIXTURE_SETUP(mount_setattr_idmapped)
1000 {
1001 int img_fd = -EBADF;
1002
1003 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1004
1005 ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1006
1007 (void)umount2("/mnt", MNT_DETACH);
1008 (void)umount2("/tmp", MNT_DETACH);
1009
1010 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1011 "size=100000,mode=700"), 0);
1012
1013 ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1014 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1015 ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1016
1017 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1018 "size=100000,mode=700"), 0);
1019
1020 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1021 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1022 ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1023
1024 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1025 "size=100000,mode=700"), 0);
1026
1027 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1028 "size=100000,mode=700"), 0);
1029
1030 ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1031
1032 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1033 "size=100000,mode=700"), 0);
1034
1035 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1036
1037 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1038
1039 ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1040
1041 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1042 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1043
1044 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1045
1046 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1047 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1048
1049 ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1050 ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1051 img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1052 ASSERT_GE(img_fd, 0);
1053 ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1054 ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1055 ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1056 ASSERT_EQ(close(img_fd), 0);
1057 }
1058
FIXTURE_TEARDOWN(mount_setattr_idmapped)1059 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1060 {
1061 (void)umount2("/mnt/A", MNT_DETACH);
1062 (void)umount2("/tmp", MNT_DETACH);
1063 }
1064
1065 /**
1066 * Validate that negative fd values are rejected.
1067 */
TEST_F(mount_setattr_idmapped,invalid_fd_negative)1068 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1069 {
1070 struct mount_attr attr = {
1071 .attr_set = MOUNT_ATTR_IDMAP,
1072 .userns_fd = -EBADF,
1073 };
1074
1075 if (!mount_setattr_supported())
1076 SKIP(return, "mount_setattr syscall not supported");
1077
1078 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1079 TH_LOG("failure: created idmapped mount with negative fd");
1080 }
1081 }
1082
1083 /**
1084 * Validate that excessively large fd values are rejected.
1085 */
TEST_F(mount_setattr_idmapped,invalid_fd_large)1086 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1087 {
1088 struct mount_attr attr = {
1089 .attr_set = MOUNT_ATTR_IDMAP,
1090 .userns_fd = INT64_MAX,
1091 };
1092
1093 if (!mount_setattr_supported())
1094 SKIP(return, "mount_setattr syscall not supported");
1095
1096 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1097 TH_LOG("failure: created idmapped mount with too large fd value");
1098 }
1099 }
1100
1101 /**
1102 * Validate that closed fd values are rejected.
1103 */
TEST_F(mount_setattr_idmapped,invalid_fd_closed)1104 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1105 {
1106 int fd;
1107 struct mount_attr attr = {
1108 .attr_set = MOUNT_ATTR_IDMAP,
1109 };
1110
1111 if (!mount_setattr_supported())
1112 SKIP(return, "mount_setattr syscall not supported");
1113
1114 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1115 ASSERT_GE(fd, 0);
1116 ASSERT_GE(close(fd), 0);
1117
1118 attr.userns_fd = fd;
1119 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1120 TH_LOG("failure: created idmapped mount with closed fd");
1121 }
1122 }
1123
1124 /**
1125 * Validate that the initial user namespace is rejected.
1126 */
TEST_F(mount_setattr_idmapped,invalid_fd_initial_userns)1127 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1128 {
1129 int open_tree_fd = -EBADF;
1130 struct mount_attr attr = {
1131 .attr_set = MOUNT_ATTR_IDMAP,
1132 };
1133
1134 if (!mount_setattr_supported())
1135 SKIP(return, "mount_setattr syscall not supported");
1136
1137 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1138 AT_NO_AUTOMOUNT |
1139 AT_SYMLINK_NOFOLLOW |
1140 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1141 ASSERT_GE(open_tree_fd, 0);
1142
1143 attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1144 ASSERT_GE(attr.userns_fd, 0);
1145 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1146 ASSERT_EQ(errno, EPERM);
1147 ASSERT_EQ(close(attr.userns_fd), 0);
1148 ASSERT_EQ(close(open_tree_fd), 0);
1149 }
1150
map_ids(pid_t pid,unsigned long nsid,unsigned long hostid,unsigned long range)1151 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1152 unsigned long range)
1153 {
1154 char map[100], procfile[256];
1155
1156 snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1157 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1158 if (write_file(procfile, map, strlen(map)))
1159 return -1;
1160
1161
1162 snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1163 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1164 if (write_file(procfile, map, strlen(map)))
1165 return -1;
1166
1167 return 0;
1168 }
1169
1170 #define __STACK_SIZE (8 * 1024 * 1024)
do_clone(int (* fn)(void *),void * arg,int flags)1171 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1172 {
1173 void *stack;
1174
1175 stack = malloc(__STACK_SIZE);
1176 if (!stack)
1177 return -ENOMEM;
1178
1179 #ifdef __ia64__
1180 return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1181 #else
1182 return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1183 #endif
1184 }
1185
get_userns_fd_cb(void * data)1186 static int get_userns_fd_cb(void *data)
1187 {
1188 return kill(getpid(), SIGSTOP);
1189 }
1190
wait_for_pid(pid_t pid)1191 static int wait_for_pid(pid_t pid)
1192 {
1193 int status, ret;
1194
1195 again:
1196 ret = waitpid(pid, &status, 0);
1197 if (ret == -1) {
1198 if (errno == EINTR)
1199 goto again;
1200
1201 return -1;
1202 }
1203
1204 if (!WIFEXITED(status))
1205 return -1;
1206
1207 return WEXITSTATUS(status);
1208 }
1209
get_userns_fd(unsigned long nsid,unsigned long hostid,unsigned long range)1210 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1211 {
1212 int ret;
1213 pid_t pid;
1214 char path[256];
1215
1216 pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1217 if (pid < 0)
1218 return -errno;
1219
1220 ret = map_ids(pid, nsid, hostid, range);
1221 if (ret < 0)
1222 return ret;
1223
1224 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1225 ret = open(path, O_RDONLY | O_CLOEXEC);
1226 kill(pid, SIGKILL);
1227 wait_for_pid(pid);
1228 return ret;
1229 }
1230
1231 /**
1232 * Validate that an attached mount in our mount namespace cannot be idmapped.
1233 * (The kernel enforces that the mount's mount namespace and the caller's mount
1234 * namespace match.)
1235 */
TEST_F(mount_setattr_idmapped,attached_mount_inside_current_mount_namespace)1236 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1237 {
1238 int open_tree_fd = -EBADF;
1239 struct mount_attr attr = {
1240 .attr_set = MOUNT_ATTR_IDMAP,
1241 };
1242
1243 if (!mount_setattr_supported())
1244 SKIP(return, "mount_setattr syscall not supported");
1245
1246 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1247 AT_EMPTY_PATH |
1248 AT_NO_AUTOMOUNT |
1249 AT_SYMLINK_NOFOLLOW |
1250 OPEN_TREE_CLOEXEC);
1251 ASSERT_GE(open_tree_fd, 0);
1252
1253 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1254 ASSERT_GE(attr.userns_fd, 0);
1255 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1256 ASSERT_EQ(close(attr.userns_fd), 0);
1257 ASSERT_EQ(close(open_tree_fd), 0);
1258 }
1259
1260 /**
1261 * Validate that idmapping a mount is rejected if the mount's mount namespace
1262 * and our mount namespace don't match.
1263 * (The kernel enforces that the mount's mount namespace and the caller's mount
1264 * namespace match.)
1265 */
TEST_F(mount_setattr_idmapped,attached_mount_outside_current_mount_namespace)1266 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1267 {
1268 int open_tree_fd = -EBADF;
1269 struct mount_attr attr = {
1270 .attr_set = MOUNT_ATTR_IDMAP,
1271 };
1272
1273 if (!mount_setattr_supported())
1274 SKIP(return, "mount_setattr syscall not supported");
1275
1276 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1277 AT_EMPTY_PATH |
1278 AT_NO_AUTOMOUNT |
1279 AT_SYMLINK_NOFOLLOW |
1280 OPEN_TREE_CLOEXEC);
1281 ASSERT_GE(open_tree_fd, 0);
1282
1283 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1284
1285 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1286 ASSERT_GE(attr.userns_fd, 0);
1287 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1288 sizeof(attr)), 0);
1289 ASSERT_EQ(close(attr.userns_fd), 0);
1290 ASSERT_EQ(close(open_tree_fd), 0);
1291 }
1292
1293 /**
1294 * Validate that an attached mount in our mount namespace can be idmapped.
1295 */
TEST_F(mount_setattr_idmapped,detached_mount_inside_current_mount_namespace)1296 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1297 {
1298 int open_tree_fd = -EBADF;
1299 struct mount_attr attr = {
1300 .attr_set = MOUNT_ATTR_IDMAP,
1301 };
1302
1303 if (!mount_setattr_supported())
1304 SKIP(return, "mount_setattr syscall not supported");
1305
1306 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1307 AT_EMPTY_PATH |
1308 AT_NO_AUTOMOUNT |
1309 AT_SYMLINK_NOFOLLOW |
1310 OPEN_TREE_CLOEXEC |
1311 OPEN_TREE_CLONE);
1312 ASSERT_GE(open_tree_fd, 0);
1313
1314 /* Changing mount properties on a detached mount. */
1315 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1316 ASSERT_GE(attr.userns_fd, 0);
1317 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1318 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1319 ASSERT_EQ(close(attr.userns_fd), 0);
1320 ASSERT_EQ(close(open_tree_fd), 0);
1321 }
1322
1323 /**
1324 * Validate that a detached mount not in our mount namespace can be idmapped.
1325 */
TEST_F(mount_setattr_idmapped,detached_mount_outside_current_mount_namespace)1326 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1327 {
1328 int open_tree_fd = -EBADF;
1329 struct mount_attr attr = {
1330 .attr_set = MOUNT_ATTR_IDMAP,
1331 };
1332
1333 if (!mount_setattr_supported())
1334 SKIP(return, "mount_setattr syscall not supported");
1335
1336 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1337 AT_EMPTY_PATH |
1338 AT_NO_AUTOMOUNT |
1339 AT_SYMLINK_NOFOLLOW |
1340 OPEN_TREE_CLOEXEC |
1341 OPEN_TREE_CLONE);
1342 ASSERT_GE(open_tree_fd, 0);
1343
1344 ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1345
1346 /* Changing mount properties on a detached mount. */
1347 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1348 ASSERT_GE(attr.userns_fd, 0);
1349 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1350 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1351 ASSERT_EQ(close(attr.userns_fd), 0);
1352 ASSERT_EQ(close(open_tree_fd), 0);
1353 }
1354
1355 /**
1356 * Validate that currently changing the idmapping of an idmapped mount fails.
1357 */
TEST_F(mount_setattr_idmapped,change_idmapping)1358 TEST_F(mount_setattr_idmapped, change_idmapping)
1359 {
1360 int open_tree_fd = -EBADF;
1361 struct mount_attr attr = {
1362 .attr_set = MOUNT_ATTR_IDMAP,
1363 };
1364
1365 if (!mount_setattr_supported())
1366 SKIP(return, "mount_setattr syscall not supported");
1367
1368 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1369 AT_EMPTY_PATH |
1370 AT_NO_AUTOMOUNT |
1371 AT_SYMLINK_NOFOLLOW |
1372 OPEN_TREE_CLOEXEC |
1373 OPEN_TREE_CLONE);
1374 ASSERT_GE(open_tree_fd, 0);
1375
1376 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1377 ASSERT_GE(attr.userns_fd, 0);
1378 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1379 AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1380 ASSERT_EQ(close(attr.userns_fd), 0);
1381
1382 /* Change idmapping on a detached mount that is already idmapped. */
1383 attr.userns_fd = get_userns_fd(0, 20000, 10000);
1384 ASSERT_GE(attr.userns_fd, 0);
1385 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1386 ASSERT_EQ(close(attr.userns_fd), 0);
1387 ASSERT_EQ(close(open_tree_fd), 0);
1388 }
1389
expected_uid_gid(int dfd,const char * path,int flags,uid_t expected_uid,gid_t expected_gid)1390 static bool expected_uid_gid(int dfd, const char *path, int flags,
1391 uid_t expected_uid, gid_t expected_gid)
1392 {
1393 int ret;
1394 struct stat st;
1395
1396 ret = fstatat(dfd, path, &st, flags);
1397 if (ret < 0)
1398 return false;
1399
1400 return st.st_uid == expected_uid && st.st_gid == expected_gid;
1401 }
1402
TEST_F(mount_setattr_idmapped,idmap_mount_tree_invalid)1403 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1404 {
1405 int open_tree_fd = -EBADF;
1406 struct mount_attr attr = {
1407 .attr_set = MOUNT_ATTR_IDMAP,
1408 };
1409
1410 if (!mount_setattr_supported())
1411 SKIP(return, "mount_setattr syscall not supported");
1412
1413 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1414 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1415
1416 open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1417 AT_RECURSIVE |
1418 AT_EMPTY_PATH |
1419 AT_NO_AUTOMOUNT |
1420 AT_SYMLINK_NOFOLLOW |
1421 OPEN_TREE_CLOEXEC |
1422 OPEN_TREE_CLONE);
1423 ASSERT_GE(open_tree_fd, 0);
1424
1425 attr.userns_fd = get_userns_fd(0, 10000, 10000);
1426 ASSERT_GE(attr.userns_fd, 0);
1427 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1428 ASSERT_EQ(close(attr.userns_fd), 0);
1429 ASSERT_EQ(close(open_tree_fd), 0);
1430
1431 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1432 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1433 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1434 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1435 }
1436
TEST_F(mount_setattr,mount_attr_nosymfollow)1437 TEST_F(mount_setattr, mount_attr_nosymfollow)
1438 {
1439 int fd;
1440 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1441 struct mount_attr attr = {
1442 .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
1443 };
1444
1445 if (!mount_setattr_supported())
1446 SKIP(return, "mount_setattr syscall not supported");
1447
1448 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1449 ASSERT_GT(fd, 0);
1450 ASSERT_EQ(close(fd), 0);
1451
1452 old_flags = read_mnt_flags("/mnt/A");
1453 ASSERT_GT(old_flags, 0);
1454
1455 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1456
1457 expected_flags = old_flags;
1458 expected_flags |= ST_NOSYMFOLLOW;
1459
1460 new_flags = read_mnt_flags("/mnt/A");
1461 ASSERT_EQ(new_flags, expected_flags);
1462
1463 new_flags = read_mnt_flags("/mnt/A/AA");
1464 ASSERT_EQ(new_flags, expected_flags);
1465
1466 new_flags = read_mnt_flags("/mnt/A/AA/B");
1467 ASSERT_EQ(new_flags, expected_flags);
1468
1469 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1470 ASSERT_EQ(new_flags, expected_flags);
1471
1472 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1473 ASSERT_LT(fd, 0);
1474 ASSERT_EQ(errno, ELOOP);
1475
1476 attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1477 attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1478
1479 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1480
1481 expected_flags &= ~ST_NOSYMFOLLOW;
1482 new_flags = read_mnt_flags("/mnt/A");
1483 ASSERT_EQ(new_flags, expected_flags);
1484
1485 new_flags = read_mnt_flags("/mnt/A/AA");
1486 ASSERT_EQ(new_flags, expected_flags);
1487
1488 new_flags = read_mnt_flags("/mnt/A/AA/B");
1489 ASSERT_EQ(new_flags, expected_flags);
1490
1491 new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1492 ASSERT_EQ(new_flags, expected_flags);
1493
1494 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1495 ASSERT_GT(fd, 0);
1496 ASSERT_EQ(close(fd), 0);
1497 }
1498
1499 TEST_HARNESS_MAIN
1500