1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #define __EXPORTED_HEADERS__
4
5 #include <errno.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <linux/falloc.h>
9 #include <fcntl.h>
10 #include <linux/memfd.h>
11 #include <sched.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <signal.h>
15 #include <string.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21
22 #include "common.h"
23
24 #define MEMFD_STR "memfd:"
25 #define MEMFD_HUGE_STR "memfd-hugetlb:"
26 #define SHARED_FT_STR "(shared file-table)"
27
28 #define MFD_DEF_SIZE 8192
29 #define STACK_SIZE 65536
30
31 #define F_SEAL_EXEC 0x0020
32
33 #define F_WX_SEALS (F_SEAL_SHRINK | \
34 F_SEAL_GROW | \
35 F_SEAL_WRITE | \
36 F_SEAL_FUTURE_WRITE | \
37 F_SEAL_EXEC)
38
39 #define MFD_NOEXEC_SEAL 0x0008U
40
41 /*
42 * Default is not to test hugetlbfs
43 */
44 static size_t mfd_def_size = MFD_DEF_SIZE;
45 static const char *memfd_str = MEMFD_STR;
46
fd2name(int fd,char * buf,size_t bufsize)47 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
48 {
49 char buf1[PATH_MAX];
50 int size;
51 ssize_t nbytes;
52
53 size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
54 if (size < 0) {
55 printf("snprintf(%d) failed on %m\n", fd);
56 abort();
57 }
58
59 /*
60 * reserver one byte for string termination.
61 */
62 nbytes = readlink(buf1, buf, bufsize-1);
63 if (nbytes == -1) {
64 printf("readlink(%s) failed %m\n", buf1);
65 abort();
66 }
67 buf[nbytes] = '\0';
68 return nbytes;
69 }
70
mfd_assert_new(const char * name,loff_t sz,unsigned int flags)71 static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
72 {
73 int r, fd;
74
75 fd = sys_memfd_create(name, flags);
76 if (fd < 0) {
77 printf("memfd_create(\"%s\", %u) failed: %m\n",
78 name, flags);
79 abort();
80 }
81
82 r = ftruncate(fd, sz);
83 if (r < 0) {
84 printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
85 abort();
86 }
87
88 return fd;
89 }
90
sysctl_assert_write(const char * val)91 static void sysctl_assert_write(const char *val)
92 {
93 int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
94
95 if (fd < 0) {
96 printf("open sysctl failed\n");
97 abort();
98 }
99
100 if (write(fd, val, strlen(val)) < 0) {
101 printf("write sysctl failed\n");
102 abort();
103 }
104 }
105
sysctl_fail_write(const char * val)106 static void sysctl_fail_write(const char *val)
107 {
108 int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
109
110 if (fd < 0) {
111 printf("open sysctl failed\n");
112 abort();
113 }
114
115 if (write(fd, val, strlen(val)) >= 0) {
116 printf("write sysctl %s succeeded, but failure expected\n",
117 val);
118 abort();
119 }
120 }
121
mfd_assert_reopen_fd(int fd_in)122 static int mfd_assert_reopen_fd(int fd_in)
123 {
124 int fd;
125 char path[100];
126
127 sprintf(path, "/proc/self/fd/%d", fd_in);
128
129 fd = open(path, O_RDWR);
130 if (fd < 0) {
131 printf("re-open of existing fd %d failed\n", fd_in);
132 abort();
133 }
134
135 return fd;
136 }
137
mfd_fail_new(const char * name,unsigned int flags)138 static void mfd_fail_new(const char *name, unsigned int flags)
139 {
140 int r;
141
142 r = sys_memfd_create(name, flags);
143 if (r >= 0) {
144 printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
145 name, flags);
146 close(r);
147 abort();
148 }
149 }
150
mfd_assert_get_seals(int fd)151 static unsigned int mfd_assert_get_seals(int fd)
152 {
153 int r;
154
155 r = fcntl(fd, F_GET_SEALS);
156 if (r < 0) {
157 printf("GET_SEALS(%d) failed: %m\n", fd);
158 abort();
159 }
160
161 return (unsigned int)r;
162 }
163
mfd_assert_has_seals(int fd,unsigned int seals)164 static void mfd_assert_has_seals(int fd, unsigned int seals)
165 {
166 char buf[PATH_MAX];
167 int nbytes;
168 unsigned int s;
169 fd2name(fd, buf, PATH_MAX);
170
171 s = mfd_assert_get_seals(fd);
172 if (s != seals) {
173 printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
174 abort();
175 }
176 }
177
mfd_assert_add_seals(int fd,unsigned int seals)178 static void mfd_assert_add_seals(int fd, unsigned int seals)
179 {
180 int r;
181 unsigned int s;
182
183 s = mfd_assert_get_seals(fd);
184 r = fcntl(fd, F_ADD_SEALS, seals);
185 if (r < 0) {
186 printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
187 abort();
188 }
189 }
190
mfd_fail_add_seals(int fd,unsigned int seals)191 static void mfd_fail_add_seals(int fd, unsigned int seals)
192 {
193 int r;
194 unsigned int s;
195
196 r = fcntl(fd, F_GET_SEALS);
197 if (r < 0)
198 s = 0;
199 else
200 s = (unsigned int)r;
201
202 r = fcntl(fd, F_ADD_SEALS, seals);
203 if (r >= 0) {
204 printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
205 fd, s, seals);
206 abort();
207 }
208 }
209
mfd_assert_size(int fd,size_t size)210 static void mfd_assert_size(int fd, size_t size)
211 {
212 struct stat st;
213 int r;
214
215 r = fstat(fd, &st);
216 if (r < 0) {
217 printf("fstat(%d) failed: %m\n", fd);
218 abort();
219 } else if (st.st_size != size) {
220 printf("wrong file size %lld, but expected %lld\n",
221 (long long)st.st_size, (long long)size);
222 abort();
223 }
224 }
225
mfd_assert_dup(int fd)226 static int mfd_assert_dup(int fd)
227 {
228 int r;
229
230 r = dup(fd);
231 if (r < 0) {
232 printf("dup(%d) failed: %m\n", fd);
233 abort();
234 }
235
236 return r;
237 }
238
mfd_assert_mmap_shared(int fd)239 static void *mfd_assert_mmap_shared(int fd)
240 {
241 void *p;
242
243 p = mmap(NULL,
244 mfd_def_size,
245 PROT_READ | PROT_WRITE,
246 MAP_SHARED,
247 fd,
248 0);
249 if (p == MAP_FAILED) {
250 printf("mmap() failed: %m\n");
251 abort();
252 }
253
254 return p;
255 }
256
mfd_assert_mmap_private(int fd)257 static void *mfd_assert_mmap_private(int fd)
258 {
259 void *p;
260
261 p = mmap(NULL,
262 mfd_def_size,
263 PROT_READ,
264 MAP_PRIVATE,
265 fd,
266 0);
267 if (p == MAP_FAILED) {
268 printf("mmap() failed: %m\n");
269 abort();
270 }
271
272 return p;
273 }
274
mfd_assert_open(int fd,int flags,mode_t mode)275 static int mfd_assert_open(int fd, int flags, mode_t mode)
276 {
277 char buf[512];
278 int r;
279
280 sprintf(buf, "/proc/self/fd/%d", fd);
281 r = open(buf, flags, mode);
282 if (r < 0) {
283 printf("open(%s) failed: %m\n", buf);
284 abort();
285 }
286
287 return r;
288 }
289
mfd_fail_open(int fd,int flags,mode_t mode)290 static void mfd_fail_open(int fd, int flags, mode_t mode)
291 {
292 char buf[512];
293 int r;
294
295 sprintf(buf, "/proc/self/fd/%d", fd);
296 r = open(buf, flags, mode);
297 if (r >= 0) {
298 printf("open(%s) didn't fail as expected\n", buf);
299 abort();
300 }
301 }
302
mfd_assert_read(int fd)303 static void mfd_assert_read(int fd)
304 {
305 char buf[16];
306 void *p;
307 ssize_t l;
308
309 l = read(fd, buf, sizeof(buf));
310 if (l != sizeof(buf)) {
311 printf("read() failed: %m\n");
312 abort();
313 }
314
315 /* verify PROT_READ *is* allowed */
316 p = mmap(NULL,
317 mfd_def_size,
318 PROT_READ,
319 MAP_PRIVATE,
320 fd,
321 0);
322 if (p == MAP_FAILED) {
323 printf("mmap() failed: %m\n");
324 abort();
325 }
326 munmap(p, mfd_def_size);
327
328 /* verify MAP_PRIVATE is *always* allowed (even writable) */
329 p = mmap(NULL,
330 mfd_def_size,
331 PROT_READ | PROT_WRITE,
332 MAP_PRIVATE,
333 fd,
334 0);
335 if (p == MAP_FAILED) {
336 printf("mmap() failed: %m\n");
337 abort();
338 }
339 munmap(p, mfd_def_size);
340 }
341
342 /* Test that PROT_READ + MAP_SHARED mappings work. */
mfd_assert_read_shared(int fd)343 static void mfd_assert_read_shared(int fd)
344 {
345 void *p;
346
347 /* verify PROT_READ and MAP_SHARED *is* allowed */
348 p = mmap(NULL,
349 mfd_def_size,
350 PROT_READ,
351 MAP_SHARED,
352 fd,
353 0);
354 if (p == MAP_FAILED) {
355 printf("mmap() failed: %m\n");
356 abort();
357 }
358 munmap(p, mfd_def_size);
359 }
360
mfd_assert_fork_private_write(int fd)361 static void mfd_assert_fork_private_write(int fd)
362 {
363 int *p;
364 pid_t pid;
365
366 p = mmap(NULL,
367 mfd_def_size,
368 PROT_READ | PROT_WRITE,
369 MAP_PRIVATE,
370 fd,
371 0);
372 if (p == MAP_FAILED) {
373 printf("mmap() failed: %m\n");
374 abort();
375 }
376
377 p[0] = 22;
378
379 pid = fork();
380 if (pid == 0) {
381 p[0] = 33;
382 exit(0);
383 } else {
384 waitpid(pid, NULL, 0);
385
386 if (p[0] != 22) {
387 printf("MAP_PRIVATE copy-on-write failed: %m\n");
388 abort();
389 }
390 }
391
392 munmap(p, mfd_def_size);
393 }
394
mfd_assert_write(int fd)395 static void mfd_assert_write(int fd)
396 {
397 ssize_t l;
398 void *p;
399 int r;
400
401 /*
402 * huegtlbfs does not support write, but we want to
403 * verify everything else here.
404 */
405 if (!hugetlbfs_test) {
406 /* verify write() succeeds */
407 l = write(fd, "\0\0\0\0", 4);
408 if (l != 4) {
409 printf("write() failed: %m\n");
410 abort();
411 }
412 }
413
414 /* verify PROT_READ | PROT_WRITE is allowed */
415 p = mmap(NULL,
416 mfd_def_size,
417 PROT_READ | PROT_WRITE,
418 MAP_SHARED,
419 fd,
420 0);
421 if (p == MAP_FAILED) {
422 printf("mmap() failed: %m\n");
423 abort();
424 }
425 *(char *)p = 0;
426 munmap(p, mfd_def_size);
427
428 /* verify PROT_WRITE is allowed */
429 p = mmap(NULL,
430 mfd_def_size,
431 PROT_WRITE,
432 MAP_SHARED,
433 fd,
434 0);
435 if (p == MAP_FAILED) {
436 printf("mmap() failed: %m\n");
437 abort();
438 }
439 *(char *)p = 0;
440 munmap(p, mfd_def_size);
441
442 /* verify PROT_READ with MAP_SHARED is allowed and a following
443 * mprotect(PROT_WRITE) allows writing */
444 p = mmap(NULL,
445 mfd_def_size,
446 PROT_READ,
447 MAP_SHARED,
448 fd,
449 0);
450 if (p == MAP_FAILED) {
451 printf("mmap() failed: %m\n");
452 abort();
453 }
454
455 r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
456 if (r < 0) {
457 printf("mprotect() failed: %m\n");
458 abort();
459 }
460
461 *(char *)p = 0;
462 munmap(p, mfd_def_size);
463
464 /* verify PUNCH_HOLE works */
465 r = fallocate(fd,
466 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
467 0,
468 mfd_def_size);
469 if (r < 0) {
470 printf("fallocate(PUNCH_HOLE) failed: %m\n");
471 abort();
472 }
473 }
474
mfd_fail_write(int fd)475 static void mfd_fail_write(int fd)
476 {
477 ssize_t l;
478 void *p;
479 int r;
480
481 /* verify write() fails */
482 l = write(fd, "data", 4);
483 if (l != -EPERM) {
484 printf("expected EPERM on write(), but got %d: %m\n", (int)l);
485 abort();
486 }
487
488 /* verify PROT_READ | PROT_WRITE is not allowed */
489 p = mmap(NULL,
490 mfd_def_size,
491 PROT_READ | PROT_WRITE,
492 MAP_SHARED,
493 fd,
494 0);
495 if (p != MAP_FAILED) {
496 printf("mmap() didn't fail as expected\n");
497 abort();
498 }
499
500 /* verify PROT_WRITE is not allowed */
501 p = mmap(NULL,
502 mfd_def_size,
503 PROT_WRITE,
504 MAP_SHARED,
505 fd,
506 0);
507 if (p != MAP_FAILED) {
508 printf("mmap() didn't fail as expected\n");
509 abort();
510 }
511
512 /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
513 * allowed. Note that for r/w the kernel already prevents the mmap. */
514 p = mmap(NULL,
515 mfd_def_size,
516 PROT_READ,
517 MAP_SHARED,
518 fd,
519 0);
520 if (p != MAP_FAILED) {
521 r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
522 if (r >= 0) {
523 printf("mmap()+mprotect() didn't fail as expected\n");
524 abort();
525 }
526 munmap(p, mfd_def_size);
527 }
528
529 /* verify PUNCH_HOLE fails */
530 r = fallocate(fd,
531 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
532 0,
533 mfd_def_size);
534 if (r >= 0) {
535 printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
536 abort();
537 }
538 }
539
mfd_assert_shrink(int fd)540 static void mfd_assert_shrink(int fd)
541 {
542 int r, fd2;
543
544 r = ftruncate(fd, mfd_def_size / 2);
545 if (r < 0) {
546 printf("ftruncate(SHRINK) failed: %m\n");
547 abort();
548 }
549
550 mfd_assert_size(fd, mfd_def_size / 2);
551
552 fd2 = mfd_assert_open(fd,
553 O_RDWR | O_CREAT | O_TRUNC,
554 S_IRUSR | S_IWUSR);
555 close(fd2);
556
557 mfd_assert_size(fd, 0);
558 }
559
mfd_fail_shrink(int fd)560 static void mfd_fail_shrink(int fd)
561 {
562 int r;
563
564 r = ftruncate(fd, mfd_def_size / 2);
565 if (r >= 0) {
566 printf("ftruncate(SHRINK) didn't fail as expected\n");
567 abort();
568 }
569
570 mfd_fail_open(fd,
571 O_RDWR | O_CREAT | O_TRUNC,
572 S_IRUSR | S_IWUSR);
573 }
574
mfd_assert_grow(int fd)575 static void mfd_assert_grow(int fd)
576 {
577 int r;
578
579 r = ftruncate(fd, mfd_def_size * 2);
580 if (r < 0) {
581 printf("ftruncate(GROW) failed: %m\n");
582 abort();
583 }
584
585 mfd_assert_size(fd, mfd_def_size * 2);
586
587 r = fallocate(fd,
588 0,
589 0,
590 mfd_def_size * 4);
591 if (r < 0) {
592 printf("fallocate(ALLOC) failed: %m\n");
593 abort();
594 }
595
596 mfd_assert_size(fd, mfd_def_size * 4);
597 }
598
mfd_fail_grow(int fd)599 static void mfd_fail_grow(int fd)
600 {
601 int r;
602
603 r = ftruncate(fd, mfd_def_size * 2);
604 if (r >= 0) {
605 printf("ftruncate(GROW) didn't fail as expected\n");
606 abort();
607 }
608
609 r = fallocate(fd,
610 0,
611 0,
612 mfd_def_size * 4);
613 if (r >= 0) {
614 printf("fallocate(ALLOC) didn't fail as expected\n");
615 abort();
616 }
617 }
618
mfd_assert_grow_write(int fd)619 static void mfd_assert_grow_write(int fd)
620 {
621 static char *buf;
622 ssize_t l;
623
624 /* hugetlbfs does not support write */
625 if (hugetlbfs_test)
626 return;
627
628 buf = malloc(mfd_def_size * 8);
629 if (!buf) {
630 printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
631 abort();
632 }
633
634 l = pwrite(fd, buf, mfd_def_size * 8, 0);
635 if (l != (mfd_def_size * 8)) {
636 printf("pwrite() failed: %m\n");
637 abort();
638 }
639
640 mfd_assert_size(fd, mfd_def_size * 8);
641 }
642
mfd_fail_grow_write(int fd)643 static void mfd_fail_grow_write(int fd)
644 {
645 static char *buf;
646 ssize_t l;
647
648 /* hugetlbfs does not support write */
649 if (hugetlbfs_test)
650 return;
651
652 buf = malloc(mfd_def_size * 8);
653 if (!buf) {
654 printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
655 abort();
656 }
657
658 l = pwrite(fd, buf, mfd_def_size * 8, 0);
659 if (l == (mfd_def_size * 8)) {
660 printf("pwrite() didn't fail as expected\n");
661 abort();
662 }
663 }
664
mfd_assert_mode(int fd,int mode)665 static void mfd_assert_mode(int fd, int mode)
666 {
667 struct stat st;
668 char buf[PATH_MAX];
669 int nbytes;
670
671 fd2name(fd, buf, PATH_MAX);
672
673 if (fstat(fd, &st) < 0) {
674 printf("fstat(%s) failed: %m\n", buf);
675 abort();
676 }
677
678 if ((st.st_mode & 07777) != mode) {
679 printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
680 buf, (int)st.st_mode & 07777, mode);
681 abort();
682 }
683 }
684
mfd_assert_chmod(int fd,int mode)685 static void mfd_assert_chmod(int fd, int mode)
686 {
687 char buf[PATH_MAX];
688 int nbytes;
689
690 fd2name(fd, buf, PATH_MAX);
691
692 if (fchmod(fd, mode) < 0) {
693 printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
694 abort();
695 }
696
697 mfd_assert_mode(fd, mode);
698 }
699
mfd_fail_chmod(int fd,int mode)700 static void mfd_fail_chmod(int fd, int mode)
701 {
702 struct stat st;
703 char buf[PATH_MAX];
704 int nbytes;
705
706 fd2name(fd, buf, PATH_MAX);
707
708 if (fstat(fd, &st) < 0) {
709 printf("fstat(%s) failed: %m\n", buf);
710 abort();
711 }
712
713 if (fchmod(fd, mode) == 0) {
714 printf("fchmod(%s, 0%04o) didn't fail as expected\n",
715 buf, mode);
716 abort();
717 }
718
719 /* verify that file mode bits did not change */
720 mfd_assert_mode(fd, st.st_mode & 07777);
721 }
722
idle_thread_fn(void * arg)723 static int idle_thread_fn(void *arg)
724 {
725 sigset_t set;
726 int sig;
727
728 /* dummy waiter; SIGTERM terminates us anyway */
729 sigemptyset(&set);
730 sigaddset(&set, SIGTERM);
731 sigwait(&set, &sig);
732
733 return 0;
734 }
735
spawn_idle_thread(unsigned int flags)736 static pid_t spawn_idle_thread(unsigned int flags)
737 {
738 uint8_t *stack;
739 pid_t pid;
740
741 stack = malloc(STACK_SIZE);
742 if (!stack) {
743 printf("malloc(STACK_SIZE) failed: %m\n");
744 abort();
745 }
746
747 pid = clone(idle_thread_fn,
748 stack + STACK_SIZE,
749 SIGCHLD | flags,
750 NULL);
751 if (pid < 0) {
752 printf("clone() failed: %m\n");
753 abort();
754 }
755
756 return pid;
757 }
758
join_idle_thread(pid_t pid)759 static void join_idle_thread(pid_t pid)
760 {
761 kill(pid, SIGTERM);
762 waitpid(pid, NULL, 0);
763 }
764
765 /*
766 * Test memfd_create() syscall
767 * Verify syscall-argument validation, including name checks, flag validation
768 * and more.
769 */
test_create(void)770 static void test_create(void)
771 {
772 char buf[2048];
773 int fd;
774
775 printf("%s CREATE\n", memfd_str);
776
777 /* test NULL name */
778 mfd_fail_new(NULL, 0);
779
780 /* test over-long name (not zero-terminated) */
781 memset(buf, 0xff, sizeof(buf));
782 mfd_fail_new(buf, 0);
783
784 /* test over-long zero-terminated name */
785 memset(buf, 0xff, sizeof(buf));
786 buf[sizeof(buf) - 1] = 0;
787 mfd_fail_new(buf, 0);
788
789 /* verify "" is a valid name */
790 fd = mfd_assert_new("", 0, 0);
791 close(fd);
792
793 /* verify invalid O_* open flags */
794 mfd_fail_new("", 0x0100);
795 mfd_fail_new("", ~MFD_CLOEXEC);
796 mfd_fail_new("", ~MFD_ALLOW_SEALING);
797 mfd_fail_new("", ~0);
798 mfd_fail_new("", 0x80000000U);
799
800 /* verify EXEC and NOEXEC_SEAL can't both be set */
801 mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
802
803 /* verify MFD_CLOEXEC is allowed */
804 fd = mfd_assert_new("", 0, MFD_CLOEXEC);
805 close(fd);
806
807 /* verify MFD_ALLOW_SEALING is allowed */
808 fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
809 close(fd);
810
811 /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
812 fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
813 close(fd);
814 }
815
816 /*
817 * Test basic sealing
818 * A very basic sealing test to see whether setting/retrieving seals works.
819 */
test_basic(void)820 static void test_basic(void)
821 {
822 int fd;
823
824 printf("%s BASIC\n", memfd_str);
825
826 fd = mfd_assert_new("kern_memfd_basic",
827 mfd_def_size,
828 MFD_CLOEXEC | MFD_ALLOW_SEALING);
829
830 /* add basic seals */
831 mfd_assert_has_seals(fd, 0);
832 mfd_assert_add_seals(fd, F_SEAL_SHRINK |
833 F_SEAL_WRITE);
834 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
835 F_SEAL_WRITE);
836
837 /* add them again */
838 mfd_assert_add_seals(fd, F_SEAL_SHRINK |
839 F_SEAL_WRITE);
840 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
841 F_SEAL_WRITE);
842
843 /* add more seals and seal against sealing */
844 mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
845 mfd_assert_has_seals(fd, F_SEAL_SHRINK |
846 F_SEAL_GROW |
847 F_SEAL_WRITE |
848 F_SEAL_SEAL);
849
850 /* verify that sealing no longer works */
851 mfd_fail_add_seals(fd, F_SEAL_GROW);
852 mfd_fail_add_seals(fd, 0);
853
854 close(fd);
855
856 /* verify sealing does not work without MFD_ALLOW_SEALING */
857 fd = mfd_assert_new("kern_memfd_basic",
858 mfd_def_size,
859 MFD_CLOEXEC);
860 mfd_assert_has_seals(fd, F_SEAL_SEAL);
861 mfd_fail_add_seals(fd, F_SEAL_SHRINK |
862 F_SEAL_GROW |
863 F_SEAL_WRITE);
864 mfd_assert_has_seals(fd, F_SEAL_SEAL);
865 close(fd);
866 }
867
868 /*
869 * Test SEAL_WRITE
870 * Test whether SEAL_WRITE actually prevents modifications.
871 */
test_seal_write(void)872 static void test_seal_write(void)
873 {
874 int fd;
875
876 printf("%s SEAL-WRITE\n", memfd_str);
877
878 fd = mfd_assert_new("kern_memfd_seal_write",
879 mfd_def_size,
880 MFD_CLOEXEC | MFD_ALLOW_SEALING);
881 mfd_assert_has_seals(fd, 0);
882 mfd_assert_add_seals(fd, F_SEAL_WRITE);
883 mfd_assert_has_seals(fd, F_SEAL_WRITE);
884
885 mfd_assert_read(fd);
886 mfd_fail_write(fd);
887 mfd_assert_shrink(fd);
888 mfd_assert_grow(fd);
889 mfd_fail_grow_write(fd);
890
891 close(fd);
892 }
893
894 /*
895 * Test SEAL_FUTURE_WRITE
896 * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
897 */
test_seal_future_write(void)898 static void test_seal_future_write(void)
899 {
900 int fd, fd2;
901 void *p;
902
903 printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
904
905 fd = mfd_assert_new("kern_memfd_seal_future_write",
906 mfd_def_size,
907 MFD_CLOEXEC | MFD_ALLOW_SEALING);
908
909 p = mfd_assert_mmap_shared(fd);
910
911 mfd_assert_has_seals(fd, 0);
912
913 mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
914 mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
915
916 /* read should pass, writes should fail */
917 mfd_assert_read(fd);
918 mfd_assert_read_shared(fd);
919 mfd_fail_write(fd);
920
921 fd2 = mfd_assert_reopen_fd(fd);
922 /* read should pass, writes should still fail */
923 mfd_assert_read(fd2);
924 mfd_assert_read_shared(fd2);
925 mfd_fail_write(fd2);
926
927 mfd_assert_fork_private_write(fd);
928
929 munmap(p, mfd_def_size);
930 close(fd2);
931 close(fd);
932 }
933
934 /*
935 * Test SEAL_SHRINK
936 * Test whether SEAL_SHRINK actually prevents shrinking
937 */
test_seal_shrink(void)938 static void test_seal_shrink(void)
939 {
940 int fd;
941
942 printf("%s SEAL-SHRINK\n", memfd_str);
943
944 fd = mfd_assert_new("kern_memfd_seal_shrink",
945 mfd_def_size,
946 MFD_CLOEXEC | MFD_ALLOW_SEALING);
947 mfd_assert_has_seals(fd, 0);
948 mfd_assert_add_seals(fd, F_SEAL_SHRINK);
949 mfd_assert_has_seals(fd, F_SEAL_SHRINK);
950
951 mfd_assert_read(fd);
952 mfd_assert_write(fd);
953 mfd_fail_shrink(fd);
954 mfd_assert_grow(fd);
955 mfd_assert_grow_write(fd);
956
957 close(fd);
958 }
959
960 /*
961 * Test SEAL_GROW
962 * Test whether SEAL_GROW actually prevents growing
963 */
test_seal_grow(void)964 static void test_seal_grow(void)
965 {
966 int fd;
967
968 printf("%s SEAL-GROW\n", memfd_str);
969
970 fd = mfd_assert_new("kern_memfd_seal_grow",
971 mfd_def_size,
972 MFD_CLOEXEC | MFD_ALLOW_SEALING);
973 mfd_assert_has_seals(fd, 0);
974 mfd_assert_add_seals(fd, F_SEAL_GROW);
975 mfd_assert_has_seals(fd, F_SEAL_GROW);
976
977 mfd_assert_read(fd);
978 mfd_assert_write(fd);
979 mfd_assert_shrink(fd);
980 mfd_fail_grow(fd);
981 mfd_fail_grow_write(fd);
982
983 close(fd);
984 }
985
986 /*
987 * Test SEAL_SHRINK | SEAL_GROW
988 * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
989 */
test_seal_resize(void)990 static void test_seal_resize(void)
991 {
992 int fd;
993
994 printf("%s SEAL-RESIZE\n", memfd_str);
995
996 fd = mfd_assert_new("kern_memfd_seal_resize",
997 mfd_def_size,
998 MFD_CLOEXEC | MFD_ALLOW_SEALING);
999 mfd_assert_has_seals(fd, 0);
1000 mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1001 mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1002
1003 mfd_assert_read(fd);
1004 mfd_assert_write(fd);
1005 mfd_fail_shrink(fd);
1006 mfd_fail_grow(fd);
1007 mfd_fail_grow_write(fd);
1008
1009 close(fd);
1010 }
1011
1012 /*
1013 * Test SEAL_EXEC
1014 * Test fd is created with exec and allow sealing.
1015 * chmod() cannot change x bits after sealing.
1016 */
test_exec_seal(void)1017 static void test_exec_seal(void)
1018 {
1019 int fd;
1020
1021 printf("%s SEAL-EXEC\n", memfd_str);
1022
1023 printf("%s Apply SEAL_EXEC\n", memfd_str);
1024 fd = mfd_assert_new("kern_memfd_seal_exec",
1025 mfd_def_size,
1026 MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1027
1028 mfd_assert_mode(fd, 0777);
1029 mfd_assert_chmod(fd, 0644);
1030
1031 mfd_assert_has_seals(fd, 0);
1032 mfd_assert_add_seals(fd, F_SEAL_EXEC);
1033 mfd_assert_has_seals(fd, F_SEAL_EXEC);
1034
1035 mfd_assert_chmod(fd, 0600);
1036 mfd_fail_chmod(fd, 0777);
1037 mfd_fail_chmod(fd, 0670);
1038 mfd_fail_chmod(fd, 0605);
1039 mfd_fail_chmod(fd, 0700);
1040 mfd_fail_chmod(fd, 0100);
1041 mfd_assert_chmod(fd, 0666);
1042 mfd_assert_write(fd);
1043 close(fd);
1044
1045 printf("%s Apply ALL_SEALS\n", memfd_str);
1046 fd = mfd_assert_new("kern_memfd_seal_exec",
1047 mfd_def_size,
1048 MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1049
1050 mfd_assert_mode(fd, 0777);
1051 mfd_assert_chmod(fd, 0700);
1052
1053 mfd_assert_has_seals(fd, 0);
1054 mfd_assert_add_seals(fd, F_SEAL_EXEC);
1055 mfd_assert_has_seals(fd, F_WX_SEALS);
1056
1057 mfd_fail_chmod(fd, 0711);
1058 mfd_fail_chmod(fd, 0600);
1059 mfd_fail_write(fd);
1060 close(fd);
1061 }
1062
1063 /*
1064 * Test EXEC_NO_SEAL
1065 * Test fd is created with exec and not allow sealing.
1066 */
test_exec_no_seal(void)1067 static void test_exec_no_seal(void)
1068 {
1069 int fd;
1070
1071 printf("%s EXEC_NO_SEAL\n", memfd_str);
1072
1073 /* Create with EXEC but without ALLOW_SEALING */
1074 fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1075 mfd_def_size,
1076 MFD_CLOEXEC | MFD_EXEC);
1077 mfd_assert_mode(fd, 0777);
1078 mfd_assert_has_seals(fd, F_SEAL_SEAL);
1079 mfd_assert_chmod(fd, 0666);
1080 close(fd);
1081 }
1082
1083 /*
1084 * Test memfd_create with MFD_NOEXEC flag
1085 */
test_noexec_seal(void)1086 static void test_noexec_seal(void)
1087 {
1088 int fd;
1089
1090 printf("%s NOEXEC_SEAL\n", memfd_str);
1091
1092 /* Create with NOEXEC and ALLOW_SEALING */
1093 fd = mfd_assert_new("kern_memfd_noexec",
1094 mfd_def_size,
1095 MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1096 mfd_assert_mode(fd, 0666);
1097 mfd_assert_has_seals(fd, F_SEAL_EXEC);
1098 mfd_fail_chmod(fd, 0777);
1099 close(fd);
1100
1101 /* Create with NOEXEC but without ALLOW_SEALING */
1102 fd = mfd_assert_new("kern_memfd_noexec",
1103 mfd_def_size,
1104 MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1105 mfd_assert_mode(fd, 0666);
1106 mfd_assert_has_seals(fd, F_SEAL_EXEC);
1107 mfd_fail_chmod(fd, 0777);
1108 close(fd);
1109 }
1110
test_sysctl_child(void)1111 static void test_sysctl_child(void)
1112 {
1113 int fd;
1114
1115 printf("%s sysctl 0\n", memfd_str);
1116 sysctl_assert_write("0");
1117 fd = mfd_assert_new("kern_memfd_sysctl_0",
1118 mfd_def_size,
1119 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1120
1121 mfd_assert_mode(fd, 0777);
1122 mfd_assert_has_seals(fd, 0);
1123 mfd_assert_chmod(fd, 0644);
1124 close(fd);
1125
1126 printf("%s sysctl 1\n", memfd_str);
1127 sysctl_assert_write("1");
1128 fd = mfd_assert_new("kern_memfd_sysctl_1",
1129 mfd_def_size,
1130 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1131
1132 mfd_assert_mode(fd, 0666);
1133 mfd_assert_has_seals(fd, F_SEAL_EXEC);
1134 mfd_fail_chmod(fd, 0777);
1135 sysctl_fail_write("0");
1136 close(fd);
1137
1138 printf("%s sysctl 2\n", memfd_str);
1139 sysctl_assert_write("2");
1140 mfd_fail_new("kern_memfd_sysctl_2",
1141 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1142 sysctl_fail_write("0");
1143 sysctl_fail_write("1");
1144 }
1145
newpid_thread_fn(void * arg)1146 static int newpid_thread_fn(void *arg)
1147 {
1148 test_sysctl_child();
1149 return 0;
1150 }
1151
test_sysctl_child2(void)1152 static void test_sysctl_child2(void)
1153 {
1154 int fd;
1155
1156 sysctl_fail_write("0");
1157 fd = mfd_assert_new("kern_memfd_sysctl_1",
1158 mfd_def_size,
1159 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1160
1161 mfd_assert_mode(fd, 0666);
1162 mfd_assert_has_seals(fd, F_SEAL_EXEC);
1163 mfd_fail_chmod(fd, 0777);
1164 close(fd);
1165 }
1166
newpid_thread_fn2(void * arg)1167 static int newpid_thread_fn2(void *arg)
1168 {
1169 test_sysctl_child2();
1170 return 0;
1171 }
spawn_newpid_thread(unsigned int flags,int (* fn)(void *))1172 static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *))
1173 {
1174 uint8_t *stack;
1175 pid_t pid;
1176
1177 stack = malloc(STACK_SIZE);
1178 if (!stack) {
1179 printf("malloc(STACK_SIZE) failed: %m\n");
1180 abort();
1181 }
1182
1183 pid = clone(fn,
1184 stack + STACK_SIZE,
1185 SIGCHLD | flags,
1186 NULL);
1187 if (pid < 0) {
1188 printf("clone() failed: %m\n");
1189 abort();
1190 }
1191
1192 return pid;
1193 }
1194
join_newpid_thread(pid_t pid)1195 static void join_newpid_thread(pid_t pid)
1196 {
1197 waitpid(pid, NULL, 0);
1198 }
1199
1200 /*
1201 * Test sysctl
1202 * A very basic sealing test to see whether setting/retrieving seals works.
1203 */
test_sysctl(void)1204 static void test_sysctl(void)
1205 {
1206 int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn);
1207
1208 join_newpid_thread(pid);
1209
1210 printf("%s child ns\n", memfd_str);
1211 sysctl_assert_write("1");
1212
1213 pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2);
1214 join_newpid_thread(pid);
1215 }
1216
1217 /*
1218 * Test sharing via dup()
1219 * Test that seals are shared between dupped FDs and they're all equal.
1220 */
test_share_dup(char * banner,char * b_suffix)1221 static void test_share_dup(char *banner, char *b_suffix)
1222 {
1223 int fd, fd2;
1224
1225 printf("%s %s %s\n", memfd_str, banner, b_suffix);
1226
1227 fd = mfd_assert_new("kern_memfd_share_dup",
1228 mfd_def_size,
1229 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1230 mfd_assert_has_seals(fd, 0);
1231
1232 fd2 = mfd_assert_dup(fd);
1233 mfd_assert_has_seals(fd2, 0);
1234
1235 mfd_assert_add_seals(fd, F_SEAL_WRITE);
1236 mfd_assert_has_seals(fd, F_SEAL_WRITE);
1237 mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1238
1239 mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1240 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1241 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1242
1243 mfd_assert_add_seals(fd, F_SEAL_SEAL);
1244 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1245 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1246
1247 mfd_fail_add_seals(fd, F_SEAL_GROW);
1248 mfd_fail_add_seals(fd2, F_SEAL_GROW);
1249 mfd_fail_add_seals(fd, F_SEAL_SEAL);
1250 mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1251
1252 close(fd2);
1253
1254 mfd_fail_add_seals(fd, F_SEAL_GROW);
1255 close(fd);
1256 }
1257
1258 /*
1259 * Test sealing with active mmap()s
1260 * Modifying seals is only allowed if no other mmap() refs exist.
1261 */
test_share_mmap(char * banner,char * b_suffix)1262 static void test_share_mmap(char *banner, char *b_suffix)
1263 {
1264 int fd;
1265 void *p;
1266
1267 printf("%s %s %s\n", memfd_str, banner, b_suffix);
1268
1269 fd = mfd_assert_new("kern_memfd_share_mmap",
1270 mfd_def_size,
1271 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1272 mfd_assert_has_seals(fd, 0);
1273
1274 /* shared/writable ref prevents sealing WRITE, but allows others */
1275 p = mfd_assert_mmap_shared(fd);
1276 mfd_fail_add_seals(fd, F_SEAL_WRITE);
1277 mfd_assert_has_seals(fd, 0);
1278 mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1279 mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1280 munmap(p, mfd_def_size);
1281
1282 /* readable ref allows sealing */
1283 p = mfd_assert_mmap_private(fd);
1284 mfd_assert_add_seals(fd, F_SEAL_WRITE);
1285 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1286 munmap(p, mfd_def_size);
1287
1288 close(fd);
1289 }
1290
1291 /*
1292 * Test sealing with open(/proc/self/fd/%d)
1293 * Via /proc we can get access to a separate file-context for the same memfd.
1294 * This is *not* like dup(), but like a real separate open(). Make sure the
1295 * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1296 */
test_share_open(char * banner,char * b_suffix)1297 static void test_share_open(char *banner, char *b_suffix)
1298 {
1299 int fd, fd2;
1300
1301 printf("%s %s %s\n", memfd_str, banner, b_suffix);
1302
1303 fd = mfd_assert_new("kern_memfd_share_open",
1304 mfd_def_size,
1305 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1306 mfd_assert_has_seals(fd, 0);
1307
1308 fd2 = mfd_assert_open(fd, O_RDWR, 0);
1309 mfd_assert_add_seals(fd, F_SEAL_WRITE);
1310 mfd_assert_has_seals(fd, F_SEAL_WRITE);
1311 mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1312
1313 mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1314 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1315 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1316
1317 close(fd);
1318 fd = mfd_assert_open(fd2, O_RDONLY, 0);
1319
1320 mfd_fail_add_seals(fd, F_SEAL_SEAL);
1321 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1322 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1323
1324 close(fd2);
1325 fd2 = mfd_assert_open(fd, O_RDWR, 0);
1326
1327 mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1328 mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1329 mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1330
1331 close(fd2);
1332 close(fd);
1333 }
1334
1335 /*
1336 * Test sharing via fork()
1337 * Test whether seal-modifications work as expected with forked childs.
1338 */
test_share_fork(char * banner,char * b_suffix)1339 static void test_share_fork(char *banner, char *b_suffix)
1340 {
1341 int fd;
1342 pid_t pid;
1343
1344 printf("%s %s %s\n", memfd_str, banner, b_suffix);
1345
1346 fd = mfd_assert_new("kern_memfd_share_fork",
1347 mfd_def_size,
1348 MFD_CLOEXEC | MFD_ALLOW_SEALING);
1349 mfd_assert_has_seals(fd, 0);
1350
1351 pid = spawn_idle_thread(0);
1352 mfd_assert_add_seals(fd, F_SEAL_SEAL);
1353 mfd_assert_has_seals(fd, F_SEAL_SEAL);
1354
1355 mfd_fail_add_seals(fd, F_SEAL_WRITE);
1356 mfd_assert_has_seals(fd, F_SEAL_SEAL);
1357
1358 join_idle_thread(pid);
1359
1360 mfd_fail_add_seals(fd, F_SEAL_WRITE);
1361 mfd_assert_has_seals(fd, F_SEAL_SEAL);
1362
1363 close(fd);
1364 }
1365
main(int argc,char ** argv)1366 int main(int argc, char **argv)
1367 {
1368 pid_t pid;
1369
1370 if (argc == 2) {
1371 if (!strcmp(argv[1], "hugetlbfs")) {
1372 unsigned long hpage_size = default_huge_page_size();
1373
1374 if (!hpage_size) {
1375 printf("Unable to determine huge page size\n");
1376 abort();
1377 }
1378
1379 hugetlbfs_test = 1;
1380 memfd_str = MEMFD_HUGE_STR;
1381 mfd_def_size = hpage_size * 2;
1382 } else {
1383 printf("Unknown option: %s\n", argv[1]);
1384 abort();
1385 }
1386 }
1387
1388 test_create();
1389 test_basic();
1390 test_exec_seal();
1391 test_exec_no_seal();
1392 test_noexec_seal();
1393
1394 test_seal_write();
1395 test_seal_future_write();
1396 test_seal_shrink();
1397 test_seal_grow();
1398 test_seal_resize();
1399
1400 test_share_dup("SHARE-DUP", "");
1401 test_share_mmap("SHARE-MMAP", "");
1402 test_share_open("SHARE-OPEN", "");
1403 test_share_fork("SHARE-FORK", "");
1404
1405 /* Run test-suite in a multi-threaded environment with a shared
1406 * file-table. */
1407 pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1408 test_share_dup("SHARE-DUP", SHARED_FT_STR);
1409 test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1410 test_share_open("SHARE-OPEN", SHARED_FT_STR);
1411 test_share_fork("SHARE-FORK", SHARED_FT_STR);
1412 join_idle_thread(pid);
1413
1414 test_sysctl();
1415
1416 printf("memfd: DONE\n");
1417
1418 return 0;
1419 }
1420