1 // Copyright 2016 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <assert.h>
6 #include <dirent.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <limits.h>
10 #include <poll.h>
11 #include <stdarg.h>
12 #include <stdbool.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/ioctl.h>
16 #include <sys/mman.h>
17 #include <sys/select.h>
18 #include <sys/stat.h>
19 #include <sys/statfs.h>
20 #include <sys/uio.h>
21 #include <utime.h>
22 #include <threads.h>
23 #include <unistd.h>
24 
25 #include <fuchsia/io/c/fidl.h>
26 #include <zircon/assert.h>
27 #include <zircon/compiler.h>
28 #include <zircon/device/vfs.h>
29 #include <zircon/process.h>
30 #include <zircon/processargs.h>
31 #include <zircon/syscalls.h>
32 #include <zircon/time.h>
33 
34 #include <fuchsia/io/c/fidl.h>
35 #include <lib/fdio/debug.h>
36 #include <lib/fdio/io.h>
37 #include <lib/fdio/namespace.h>
38 #include <lib/fdio/private.h>
39 #include <lib/fdio/unsafe.h>
40 #include <lib/fdio/util.h>
41 #include <lib/fdio/vfs.h>
42 
43 #include "private.h"
44 #include "unistd.h"
45 
46 static_assert(IOFLAG_CLOEXEC == FD_CLOEXEC, "Unexpected fdio flags value");
47 
48 // non-thread-safe emulation of unistd io functions
49 // using the fdio transports
50 
51 fdio_state_t __fdio_global_state = {
52     .lock = MTX_INIT,
53     .cwd_lock = MTX_INIT,
54     .init = true,
55     .cwd_path = "/",
56 };
57 
58 // fdio_reserved_io is a globally shared fdio_t that is used to represent a
59 // reservation in the fdtab. If a user observes fdio_reserved_io there is a race
60 // condition in their code or they are looking up fd's by number.
61 // fdio_reserved_io is used in the time between a user requesting an operation
62 // that creates and fd, and the time when a remote operation to create the
63 // backing fdio_t is created, without holding the fdtab lock. Examples include
64 // open() of a file, or accept() on a socket.
65 static fdio_t fdio_reserved_io = {
66     // TODO(raggi): It may be ideal to replace these operations with ones that
67     // more directly encode the result that a user must have implemented a race
68     // in order to invoke them.
69     .ops = NULL,
70     .magic = FDIO_MAGIC,
71     .refcount = 1,
72     .dupcount = 1,
73     .ioflag = 0,
74 };
75 
76 
fdio_is_reserved_or_null(fdio_t * io)77 static bool fdio_is_reserved_or_null(fdio_t *io) {
78     if (io == NULL || io == &fdio_reserved_io) {
79         return true;
80     }
81     return false;
82 }
83 
fdio_reserve_fd(int starting_fd)84 int fdio_reserve_fd(int starting_fd) {
85     if ((starting_fd < 0) || (starting_fd >= FDIO_MAX_FD)) {
86         errno = EINVAL;
87         return -1;
88     }
89     mtx_lock(&fdio_lock);
90     for (int fd = starting_fd; fd < FDIO_MAX_FD; fd++) {
91         if (fdio_fdtab[fd] == NULL) {
92             fdio_fdtab[fd] = &fdio_reserved_io;
93             mtx_unlock(&fdio_lock);
94             return fd;
95         }
96     }
97     mtx_unlock(&fdio_lock);
98     errno = EMFILE;
99     return -1;
100 }
101 
fdio_assign_reserved(int fd,fdio_t * io)102 int fdio_assign_reserved(int fd, fdio_t *io) {
103     mtx_lock(&fdio_lock);
104     fdio_t *res = fdio_fdtab[fd];
105     if (res != &fdio_reserved_io) {
106         mtx_unlock(&fdio_lock);
107         errno = EINVAL;
108         return -1;
109     }
110     io->dupcount++;
111     fdio_fdtab[fd] = io;
112     mtx_unlock(&fdio_lock);
113     return fd;
114 }
115 
fdio_release_reserved(int fd)116 int fdio_release_reserved(int fd) {
117     if ((fd < 0) || (fd >= FDIO_MAX_FD)) {
118         errno = EINVAL;
119         return -1;
120     }
121     mtx_lock(&fdio_lock);
122     fdio_t *res = fdio_fdtab[fd];
123     if (res != &fdio_reserved_io) {
124         mtx_unlock(&fdio_lock);
125         errno = EINVAL;
126         return -1;
127     }
128     fdio_fdtab[fd] = NULL;
129     mtx_unlock(&fdio_lock);
130     return fd;
131 }
132 
133 // Attaches an fdio to an fdtab slot.
134 // The fdio must have been upref'd on behalf of the
135 // fdtab prior to binding.
136 __EXPORT
fdio_bind_to_fd(fdio_t * io,int fd,int starting_fd)137 int fdio_bind_to_fd(fdio_t* io, int fd, int starting_fd) {
138     fdio_t* io_to_close = NULL;
139 
140     mtx_lock(&fdio_lock);
141     LOG(1, "fdio: bind_to_fd(%p, %d, %d)\n", io, fd, starting_fd);
142     if (fd < 0) {
143         // If we are not given an |fd|, the |starting_fd| must be non-negative.
144         if (starting_fd < 0) {
145             errno = EINVAL;
146             mtx_unlock(&fdio_lock);
147             return -1;
148         }
149 
150         // A negative fd implies that any free fd value can be used
151         //TODO: bitmap, ffs, etc
152         for (fd = starting_fd; fd < FDIO_MAX_FD; fd++) {
153             if (fdio_fdtab[fd] == NULL) {
154                 goto free_fd_found;
155             }
156         }
157         errno = EMFILE;
158         mtx_unlock(&fdio_lock);
159         return -1;
160     } else if (fd >= FDIO_MAX_FD) {
161         errno = EINVAL;
162         mtx_unlock(&fdio_lock);
163         return -1;
164     } else {
165         io_to_close = fdio_fdtab[fd];
166         if (io_to_close) {
167             io_to_close->dupcount--;
168             LOG(1, "fdio: bind_to_fd: closed fd=%d, io=%p, dupcount=%d\n",
169                 fd, io_to_close, io_to_close->dupcount);
170             if (io_to_close->dupcount > 0) {
171                 // still alive in another fdtab slot
172                 fdio_release(io_to_close);
173                 io_to_close = NULL;
174             }
175         }
176     }
177 
178 free_fd_found:
179     LOG(1, "fdio: bind_to_fd() OK fd=%d\n", fd);
180     io->dupcount++;
181     fdio_fdtab[fd] = io;
182     mtx_unlock(&fdio_lock);
183 
184     if (io_to_close) {
185         io_to_close->ops->close(io_to_close);
186         fdio_release(io_to_close);
187     }
188     return fd;
189 }
190 
191 // If a fdio_t exists for this fd and it has not been dup'd
192 // and is not in active use (an io operation underway, etc),
193 // detach it from the fdtab and return it with a single
194 // refcount.
195 __EXPORT
fdio_unbind_from_fd(int fd,fdio_t ** out)196 zx_status_t fdio_unbind_from_fd(int fd, fdio_t** out) {
197     zx_status_t status;
198     mtx_lock(&fdio_lock);
199     LOG(1, "fdio: unbind_from_fd(%d)\n", fd);
200     if (fd >= FDIO_MAX_FD) {
201         status = ZX_ERR_INVALID_ARGS;
202         goto done;
203     }
204     fdio_t* io = fdio_fdtab[fd];
205     if (fdio_is_reserved_or_null(io)) {
206         status = ZX_ERR_INVALID_ARGS;
207         goto done;
208     }
209     if (io->dupcount > 1) {
210         status = ZX_ERR_UNAVAILABLE;
211         goto done;
212     }
213     if (atomic_load(&io->refcount) > 1) {
214         status = ZX_ERR_UNAVAILABLE;
215         goto done;
216     }
217     io->dupcount = 0;
218     fdio_fdtab[fd] = NULL;
219     *out = io;
220     status = ZX_OK;
221 done:
222     mtx_unlock(&fdio_lock);
223     return status;
224 }
225 
226 __EXPORT
fdio_unsafe_fd_to_io(int fd)227 fdio_t* fdio_unsafe_fd_to_io(int fd) {
228     if ((fd < 0) || (fd >= FDIO_MAX_FD)) {
229         return NULL;
230     }
231     fdio_t* io = NULL;
232     mtx_lock(&fdio_lock);
233     io = fdio_fdtab[fd];
234     if (fdio_is_reserved_or_null(io)) {
235         // Never hand back the reserved io as it does not have an ops table.
236         io = NULL;
237     } else {
238         fdio_acquire(io);
239     }
240     mtx_unlock(&fdio_lock);
241     return io;
242 }
243 
fdio_close(fdio_t * io)244 zx_status_t fdio_close(fdio_t* io) {
245     if (io->dupcount > 0) {
246         LOG(1, "fdio: close(%p): nonzero dupcount!\n", io);
247     }
248     LOG(1, "fdio: io: close(%p)\n", io);
249     return io->ops->close(io);
250 }
251 
252 // Verify the O_* flags which align with ZXIO_FS_*.
253 static_assert(O_PATH == ZX_FS_FLAG_VNODE_REF_ONLY, "Open Flag mismatch");
254 static_assert(O_ADMIN == ZX_FS_RIGHT_ADMIN, "Open Flag mismatch");
255 static_assert(O_CREAT == ZX_FS_FLAG_CREATE, "Open Flag mismatch");
256 static_assert(O_EXCL == ZX_FS_FLAG_EXCLUSIVE, "Open Flag mismatch");
257 static_assert(O_TRUNC == ZX_FS_FLAG_TRUNCATE, "Open Flag mismatch");
258 static_assert(O_DIRECTORY == ZX_FS_FLAG_DIRECTORY, "Open Flag mismatch");
259 static_assert(O_APPEND == ZX_FS_FLAG_APPEND, "Open Flag mismatch");
260 static_assert(O_NOREMOTE == ZX_FS_FLAG_NOREMOTE, "Open Flag mismatch");
261 
262 // The mask of "1:1" flags which match between both open flag representations.
263 #define ZXIO_FS_MASK (O_PATH | O_ADMIN | O_CREAT | O_EXCL | O_TRUNC | \
264                       O_DIRECTORY | O_APPEND | O_NOREMOTE)
265 
266 // Verify that the remaining O_* flags don't overlap with the ZXIO mask.
267 static_assert(!(O_RDONLY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
268 static_assert(!(O_WRONLY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
269 static_assert(!(O_RDWR & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
270 static_assert(!(O_NONBLOCK & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
271 static_assert(!(O_DSYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
272 static_assert(!(O_SYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
273 static_assert(!(O_RSYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
274 static_assert(!(O_NOFOLLOW & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
275 static_assert(!(O_CLOEXEC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
276 static_assert(!(O_NOCTTY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
277 static_assert(!(O_ASYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
278 static_assert(!(O_DIRECT & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
279 static_assert(!(O_LARGEFILE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
280 static_assert(!(O_NOATIME & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
281 static_assert(!(O_TMPFILE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
282 static_assert(!(O_PIPELINE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
283 
fdio_flags_to_zxio(uint32_t flags)284 static uint32_t fdio_flags_to_zxio(uint32_t flags) {
285     uint32_t result = 0;
286     switch (flags & O_ACCMODE) {
287     case O_RDONLY:
288         result |= ZX_FS_RIGHT_READABLE;
289         break;
290     case O_WRONLY:
291         result |= ZX_FS_RIGHT_WRITABLE;
292         break;
293     case O_RDWR:
294         result |= ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE;
295         break;
296     }
297 
298     if (!(flags & O_PIPELINE)) {
299         result |= ZX_FS_FLAG_DESCRIBE;
300     }
301 
302     result |= (flags & ZXIO_FS_MASK);
303     return result;
304 }
305 
zxio_flags_to_fdio(uint32_t flags)306 static uint32_t zxio_flags_to_fdio(uint32_t flags) {
307     uint32_t result = 0;
308     if ((flags & (ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE)) ==
309         (ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE)) {
310         result |= O_RDWR;
311     } else if (flags & ZX_FS_RIGHT_WRITABLE) {
312         result |= O_WRONLY;
313     } else {
314         result |= O_RDONLY;
315     }
316 
317     result |= (flags & ZXIO_FS_MASK);
318     return result;
319 }
320 
321 
322 // Possibly return an owned fdio_t corresponding to either the root,
323 // the cwd, or, for the ...at variants, dirfd. In the absolute path
324 // case, *path is also adjusted.
fdio_iodir(const char ** path,int dirfd)325 static fdio_t* fdio_iodir(const char** path, int dirfd) {
326     fdio_t* iodir = NULL;
327     mtx_lock(&fdio_lock);
328     if (*path[0] == '/') {
329         iodir = fdio_root_handle;
330         // Since we are sending a request to the root handle, the
331         // rest of the path should be canonicalized as a relative
332         // path (relative to this root handle).
333         while (*path[0] == '/') {
334             (*path)++;
335             if (*path[0] == 0) {
336                 *path = ".";
337             }
338         }
339     } else if (dirfd == AT_FDCWD) {
340         iodir = fdio_cwd_handle;
341     } else if ((dirfd >= 0) && (dirfd < FDIO_MAX_FD)) {
342         iodir = fdio_fdtab[dirfd];
343     }
344     if (iodir != NULL) {
345         fdio_acquire(iodir);
346     }
347     mtx_unlock(&fdio_lock);
348     return iodir;
349 }
350 
351 #define IS_SEPARATOR(c) ((c) == '/' || (c) == 0)
352 
353 // Checks that if we increment this index forward, we'll
354 // still have enough space for a null terminator within
355 // PATH_MAX bytes.
356 #define CHECK_CAN_INCREMENT(i)           \
357     if (unlikely((i) + 1 >= PATH_MAX)) { \
358         return ZX_ERR_BAD_PATH;          \
359     }
360 
361 // Cleans an input path, transforming it to out, according to the
362 // rules defined by "Lexical File Names in Plan 9 or Getting Dot-Dot Right",
363 // accessible at: https://9p.io/sys/doc/lexnames.html
364 //
365 // Code heavily inspired by Go's filepath.Clean function, from:
366 // https://golang.org/src/path/filepath/path.go
367 //
368 // out is expected to be PATH_MAX bytes long.
369 // Sets is_dir to 'true' if the path is a directory, and 'false' otherwise.
370 __EXPORT
__fdio_cleanpath(const char * in,char * out,size_t * outlen,bool * is_dir)371 zx_status_t __fdio_cleanpath(const char* in, char* out, size_t* outlen, bool* is_dir) {
372     if (in[0] == 0) {
373         strcpy(out, ".");
374         *outlen = 1;
375         *is_dir = true;
376         return ZX_OK;
377     }
378 
379     bool rooted = (in[0] == '/');
380     size_t in_index = 0; // Index of the next byte to read
381     size_t out_index = 0; // Index of the next byte to write
382 
383     if (rooted) {
384         out[out_index++] = '/';
385         in_index++;
386         *is_dir = true;
387     }
388     size_t dotdot = out_index; // The output index at which '..' cannot be cleaned further.
389 
390     while (in[in_index] != 0) {
391         *is_dir = true;
392         if (in[in_index] == '/') {
393             // 1. Reduce multiple slashes to a single slash
394             CHECK_CAN_INCREMENT(in_index);
395             in_index++;
396         } else if (in[in_index] == '.' && IS_SEPARATOR(in[in_index + 1])) {
397             // 2. Eliminate . path name elements (the current directory)
398             CHECK_CAN_INCREMENT(in_index);
399             in_index++;
400         } else if (in[in_index] == '.' && in[in_index + 1] == '.' &&
401                    IS_SEPARATOR(in[in_index + 2])) {
402             CHECK_CAN_INCREMENT(in_index + 1);
403             in_index += 2;
404             if (out_index > dotdot) {
405                 // 3. Eliminate .. path elements (the parent directory) and the element that
406                 // precedes them.
407                 out_index--;
408                 while (out_index > dotdot && out[out_index] != '/') { out_index--; }
409             } else if (rooted) {
410                 // 4. Eliminate .. elements that begin a rooted path, that is, replace /.. by / at
411                 // the beginning of a path.
412                 continue;
413             } else if (!rooted) {
414                 if (out_index > 0) {
415                     out[out_index++] = '/';
416                 }
417                 // 5. Leave intact .. elements that begin a non-rooted path.
418                 out[out_index++] = '.';
419                 out[out_index++] = '.';
420                 dotdot = out_index;
421             }
422         } else {
423             *is_dir = false;
424             if ((rooted && out_index != 1) || (!rooted && out_index != 0)) {
425                 // Add '/' before normal path component, for non-root components.
426                 out[out_index++] = '/';
427             }
428 
429             while (!IS_SEPARATOR(in[in_index])) {
430                 CHECK_CAN_INCREMENT(in_index);
431                 out[out_index++] = in[in_index++];
432             }
433         }
434     }
435 
436     if (out_index == 0) {
437         strcpy(out, ".");
438         *outlen = 1;
439         *is_dir = true;
440         return ZX_OK;
441     }
442 
443     // Append null character
444     *outlen = out_index;
445     out[out_index++] = 0;
446     return ZX_OK;
447 }
448 
__fdio_open_at(fdio_t ** io,int dirfd,const char * path,int flags,uint32_t mode)449 zx_status_t __fdio_open_at(fdio_t** io, int dirfd, const char* path, int flags, uint32_t mode) {
450     if (path == NULL) {
451         return ZX_ERR_INVALID_ARGS;
452     }
453     if (path[0] == 0) {
454         return ZX_ERR_NOT_FOUND;
455     }
456     fdio_t* iodir = fdio_iodir(&path, dirfd);
457     if (iodir == NULL) {
458         return ZX_ERR_BAD_HANDLE;
459     }
460 
461     char clean[PATH_MAX];
462     size_t outlen;
463     bool is_dir;
464     zx_status_t status = __fdio_cleanpath(path, clean, &outlen, &is_dir);
465     if (status != ZX_OK) {
466         return status;
467     }
468     flags |= (is_dir ? O_DIRECTORY : 0);
469 
470     status = iodir->ops->open(iodir, clean, fdio_flags_to_zxio(flags), mode, io);
471     fdio_release(iodir);
472     return status;
473 }
474 
__fdio_open(fdio_t ** io,const char * path,int flags,uint32_t mode)475 zx_status_t __fdio_open(fdio_t** io, const char* path, int flags, uint32_t mode) {
476     return __fdio_open_at(io, AT_FDCWD, path, flags, mode);
477 }
478 
update_cwd_path(const char * path)479 static void update_cwd_path(const char* path) {
480     if (path[0] == '/') {
481         // it's "absolute", but we'll still parse it as relative (from /)
482         // so that we normalize the path (resolving, ., .., //, etc)
483         fdio_cwd_path[0] = '/';
484         fdio_cwd_path[1] = 0;
485         path++;
486     }
487 
488     size_t seglen;
489     const char* next;
490     for (; path[0]; path = next) {
491         next = strchr(path, '/');
492         if (next == NULL) {
493             seglen = strlen(path);
494             next = path + seglen;
495         } else {
496             seglen = next - path;
497             next++;
498         }
499         if (seglen == 0) {
500             // empty segment, skip
501             continue;
502         }
503         if ((seglen == 1) && (path[0] == '.')) {
504             // no-change segment, skip
505             continue;
506         }
507         if ((seglen == 2) && (path[0] == '.') && (path[1] == '.')) {
508             // parent directory, remove the trailing path segment from cwd_path
509             char* x = strrchr(fdio_cwd_path, '/');
510             if (x == NULL) {
511                 // shouldn't ever happen
512                 goto wat;
513             }
514             // remove the current trailing path segment from cwd
515             if (x == fdio_cwd_path) {
516                 // but never remove the first /
517                 fdio_cwd_path[1] = 0;
518             } else {
519                 x[0] = 0;
520             }
521             continue;
522         }
523         // regular path segment, append to cwd_path
524         size_t len = strlen(fdio_cwd_path);
525         if ((len + seglen + 2) >= PATH_MAX) {
526             // doesn't fit, shouldn't happen, but...
527             goto wat;
528         }
529         if (len != 1) {
530             // if len is 1, path is "/", so don't append a '/'
531             fdio_cwd_path[len++] = '/';
532         }
533         memcpy(fdio_cwd_path + len, path, seglen);
534         fdio_cwd_path[len + seglen] = 0;
535     }
536     return;
537 
538 wat:
539     strcpy(fdio_cwd_path, "(unknown)");
540     return;
541 }
542 
543 // Opens the directory containing path
544 //
545 // Returns the non-directory portion of the path in 'out', which
546 // must be a buffer that can fit [NAME_MAX + 1] characters.
__fdio_opendir_containing_at(fdio_t ** io,int dirfd,const char * path,char * out)547 static zx_status_t __fdio_opendir_containing_at(fdio_t** io, int dirfd, const char* path,
548                                                 char* out) {
549     if (path == NULL) {
550         return ZX_ERR_INVALID_ARGS;
551     }
552 
553     fdio_t* iodir = fdio_iodir(&path, dirfd);
554     if (iodir == NULL) {
555         return ZX_ERR_BAD_HANDLE;
556     }
557 
558     char clean[PATH_MAX];
559     size_t pathlen;
560     bool is_dir;
561     zx_status_t status = __fdio_cleanpath(path, clean, &pathlen, &is_dir);
562     if (status != ZX_OK) {
563         fdio_release(iodir);
564         return status;
565     }
566 
567     // Find the last '/'; copy everything after it.
568     size_t i = 0;
569     for (i = pathlen - 1; i > 0; i--) {
570         if (clean[i] == '/') {
571             clean[i] = 0;
572             i++;
573             break;
574         }
575     }
576 
577     // clean[i] is now the start of the name
578     size_t namelen = pathlen - i;
579     if (namelen + (is_dir ? 1 : 0) > NAME_MAX) {
580         fdio_release(iodir);
581         return ZX_ERR_BAD_PATH;
582     }
583 
584     // Copy the trailing 'name' to out.
585     memcpy(out, clean + i, namelen);
586     if (is_dir) {
587         // TODO(smklein): Propagate this information without using
588         // the output name; it'll simplify server-side path parsing
589         // if all trailing slashes are replaced with "O_DIRECTORY".
590         out[namelen++] = '/';
591     }
592     out[namelen] = 0;
593 
594     if (i == 0 && clean[i] != '/') {
595         clean[0] = '.';
596         clean[1] = 0;
597     }
598 
599     zx_status_t r = iodir->ops->open(iodir, clean,
600                                      fdio_flags_to_zxio(O_RDONLY | O_DIRECTORY), 0, io);
601     fdio_release(iodir);
602     return r;
603 }
604 
605 // 'name' must be a user-provided buffer, at least NAME_MAX + 1 bytes long.
__fdio_opendir_containing(fdio_t ** io,const char * path,char * name)606 static zx_status_t __fdio_opendir_containing(fdio_t** io, const char* path, char* name) {
607     return __fdio_opendir_containing_at(io, AT_FDCWD, path, name);
608 }
609 
610 // hook into libc process startup
611 // this is called prior to main to set up the fdio world
612 // and thus does not use the fdio_lock
613 __EXPORT
__libc_extensions_init(uint32_t handle_count,zx_handle_t handle[],uint32_t handle_info[],uint32_t name_count,char ** names)614 void __libc_extensions_init(uint32_t handle_count,
615                             zx_handle_t handle[],
616                             uint32_t handle_info[],
617                             uint32_t name_count,
618                             char** names) {
619 
620 #ifdef FDIO_LLDEBUG
621     const char* fdiodebug = getenv("FDIODEBUG");
622     if (fdiodebug) {
623         fdio_set_debug_level(strtoul(fdiodebug, NULL, 10));
624         LOG(1, "fdio: init: debuglevel = %s\n", fdiodebug);
625     } else {
626         LOG(1, "fdio: init()\n");
627     }
628 #endif
629 
630     int stdio_fd = -1;
631 
632     // extract handles we care about
633     for (uint32_t n = 0; n < handle_count; n++) {
634         unsigned arg = PA_HND_ARG(handle_info[n]);
635         zx_handle_t h = handle[n];
636 
637         // precalculate the fd from |arg|, for FDIO cases to use.
638         unsigned arg_fd = arg & (~FDIO_FLAG_USE_FOR_STDIO);
639 
640         switch (PA_HND_TYPE(handle_info[n])) {
641         case PA_FDIO_REMOTE: {
642             // remote objects may have a second handle
643             // which is for signaling events
644             zx_handle_t event = ZX_HANDLE_INVALID;
645             if (((n + 1) < handle_count) &&
646                 (handle_info[n] == handle_info[n + 1])) {
647                 // TODO: Remove this case once all clients migrate to providing
648                 // a single handle for PA_FDIO_REMOTE.
649                 event = handle[n + 1];
650                 handle_info[n + 1] = ZX_HANDLE_INVALID;
651                 fdio_fdtab[arg_fd] = fdio_remote_create(h, event);
652                 fdio_fdtab[arg_fd]->dupcount++;
653                 LOG(1, "fdio: inherit fd=%d (channel)\n", arg_fd);
654             } else {
655                 fdio_t* io = NULL;
656                 zx_status_t status = fdio_from_channel(h, &io);
657                 if (status != ZX_OK) {
658                     LOG(1, "fdio: Failed to acquire for fd=%d (channel) status=%d (%s)\n",
659                         arg_fd, status, zx_status_get_string(status));
660                     zx_handle_close(h);
661                     continue;
662                 }
663                 fdio_fdtab[arg_fd] = io;
664                 fdio_fdtab[arg_fd]->dupcount++;
665                 LOG(1, "fdio: inherit fd=%d (channel)\n", arg_fd);
666             }
667             break;
668         }
669         case PA_FDIO_SOCKET: {
670             fdio_t* io = NULL;
671             zx_status_t status = fdio_from_socket(h, &io);
672             if (status != ZX_OK) {
673                 LOG(1, "fdio: Failed to acquire for fd=%d (socket) status=%d (%s)\n",
674                     arg_fd, status, zx_status_get_string(status));
675                 zx_handle_close(h);
676                 continue;
677             }
678             fdio_fdtab[arg_fd] = io;
679             fdio_fdtab[arg_fd]->dupcount++;
680             LOG(1, "fdio: inherit fd=%d (socket)\n", arg_fd);
681             break;
682         }
683         case PA_FDIO_LOGGER:
684             fdio_fdtab[arg_fd] = fdio_logger_create(h);
685             fdio_fdtab[arg_fd]->dupcount++;
686             LOG(1, "fdio: inherit fd=%d (log)\n", arg_fd);
687             break;
688         case PA_NS_DIR:
689             // we always continue here to not steal the
690             // handles from higher level code that may
691             // also need access to the namespace
692             if (arg >= name_count) {
693                 continue;
694             }
695             if (fdio_root_ns == NULL) {
696                 if (fdio_ns_create(&fdio_root_ns) < 0) {
697                     continue;
698                 }
699             }
700             fdio_ns_bind(fdio_root_ns, names[arg], h);
701             continue;
702         default:
703             // unknown handle, leave it alone
704             continue;
705         }
706         handle[n] = 0;
707         handle_info[n] = 0;
708 
709         // If we reach here then the handle is a PA_FDIO_* type (an fd), so
710         // check for a bit flag indicating that it should be duped into 0/1/2 to
711         // become all of stdin/out/err
712         if ((arg & FDIO_FLAG_USE_FOR_STDIO) && (arg_fd < FDIO_MAX_FD)) {
713           stdio_fd = arg_fd;
714         }
715     }
716 
717     const char* cwd = getenv("PWD");
718     cwd = (cwd == NULL) ? "/" : cwd;
719 
720     update_cwd_path(cwd);
721 
722     fdio_t* use_for_stdio = (stdio_fd >= 0) ? fdio_fdtab[stdio_fd] : NULL;
723 
724     // configure stdin/out/err if not init'd
725     for (uint32_t n = 0; n < 3; n++) {
726         if (fdio_fdtab[n] == NULL) {
727             if (use_for_stdio) {
728                 fdio_acquire(use_for_stdio);
729                 fdio_fdtab[n] = use_for_stdio;
730             } else {
731                 fdio_fdtab[n] = fdio_null_create();
732             }
733             fdio_fdtab[n]->dupcount++;
734             LOG(1, "fdio: inherit fd=%u (dup of fd=%d)\n", n, stdio_fd);
735         }
736     }
737 
738     if (fdio_root_ns) {
739         ZX_ASSERT(!fdio_root_handle);
740         fdio_root_handle = fdio_ns_open_root(fdio_root_ns);
741     }
742     if (fdio_root_handle) {
743         fdio_root_init = true;
744         __fdio_open(&fdio_cwd_handle, fdio_cwd_path, O_RDONLY | O_DIRECTORY, 0);
745     } else {
746         // placeholder null handle
747         fdio_root_handle = fdio_null_create();
748     }
749     if (fdio_cwd_handle == NULL) {
750         fdio_cwd_handle = fdio_null_create();
751     }
752 }
753 
754 // Clean up during process teardown. This runs after atexit hooks in
755 // libc. It continues to hold the fdio lock until process exit, to
756 // prevent other threads from racing on file descriptors.
757 __EXPORT
__libc_extensions_fini(void)758 void __libc_extensions_fini(void) __TA_ACQUIRE(&fdio_lock) {
759     mtx_lock(&fdio_lock);
760     for (int fd = 0; fd < FDIO_MAX_FD; fd++) {
761         fdio_t* io = fdio_fdtab[fd];
762         if (io) {
763             fdio_fdtab[fd] = NULL;
764             io->dupcount--;
765             if (io->dupcount == 0) {
766                 io->ops->close(io);
767                 fdio_release(io);
768             }
769         }
770     }
771 }
772 
773 __EXPORT
fdio_ns_install(fdio_ns_t * ns)774 zx_status_t fdio_ns_install(fdio_ns_t* ns) {
775     fdio_t* io = fdio_ns_open_root(ns);
776     if (io == NULL) {
777         return ZX_ERR_IO;
778     }
779 
780     fdio_t* old_root = NULL;
781     zx_status_t status;
782 
783     mtx_lock(&fdio_lock);
784     if (fdio_root_ns != NULL) {
785         //TODO: support replacing an active namespace
786         status = ZX_ERR_ALREADY_EXISTS;
787     } else {
788         fdio_root_ns = ns;
789         if (fdio_root_handle) {
790             old_root = fdio_root_handle;
791         }
792         fdio_root_handle = io;
793         status = ZX_OK;
794     }
795     mtx_unlock(&fdio_lock);
796 
797     if (old_root) {
798         fdio_close(old_root);
799         fdio_release(old_root);
800     }
801     return status;
802 }
803 
804 __EXPORT
fdio_ns_get_installed(fdio_ns_t ** ns)805 zx_status_t fdio_ns_get_installed(fdio_ns_t** ns) {
806     zx_status_t status = ZX_OK;
807     mtx_lock(&fdio_lock);
808     if (fdio_root_ns == NULL) {
809         status = ZX_ERR_NOT_FOUND;
810     } else {
811         *ns = fdio_root_ns;
812     }
813     mtx_unlock(&fdio_lock);
814     return status;
815 }
816 
817 __EXPORT
fdio_clone_cwd(zx_handle_t * handles,uint32_t * types)818 zx_status_t fdio_clone_cwd(zx_handle_t* handles, uint32_t* types) {
819     return fdio_cwd_handle->ops->clone(fdio_cwd_handle, handles, types);
820 }
821 
822 __EXPORT
fdio_clone_fd(int fd,int newfd,zx_handle_t * handles,uint32_t * types)823 zx_status_t fdio_clone_fd(int fd, int newfd, zx_handle_t* handles, uint32_t* types) {
824     zx_status_t r;
825     fdio_t* io;
826     if ((io = fd_to_io(fd)) == NULL) {
827         return ZX_ERR_BAD_HANDLE;
828     }
829     // TODO(ZX-973): implement/honor close-on-exec flag
830     if ((r = io->ops->clone(io, handles, types)) > 0) {
831         for (int i = 0; i < r; i++) {
832             types[i] |= (newfd << 16);
833         }
834     }
835     fdio_release(io);
836     return r;
837 }
838 
839 __EXPORT
fdio_transfer_fd(int fd,int newfd,zx_handle_t * handles,uint32_t * types)840 zx_status_t fdio_transfer_fd(int fd, int newfd, zx_handle_t* handles, uint32_t* types) {
841     fdio_t* io;
842     zx_status_t status;
843     if ((status = fdio_unbind_from_fd(fd, &io)) < 0) {
844         return status;
845     }
846     status = io->ops->unwrap(io, handles, types);
847     fdio_release(io);
848     if (status < 0) {
849         return status;
850     }
851     for (int n = 0; n < status; n++) {
852         types[n] |= (newfd << 16);
853     }
854     return status;
855 }
856 
857 __EXPORT
fdio_ioctl(int fd,int op,const void * in_buf,size_t in_len,void * out_buf,size_t out_len)858 ssize_t fdio_ioctl(int fd, int op, const void* in_buf, size_t in_len, void* out_buf, size_t out_len) {
859     fdio_t* io;
860     if ((io = fd_to_io(fd)) == NULL) {
861         return ZX_ERR_BAD_HANDLE;
862     }
863     ssize_t r = io->ops->ioctl(io, op, in_buf, in_len, out_buf, out_len);
864     fdio_release(io);
865     return r;
866 }
867 
fdio_wait(fdio_t * io,uint32_t events,zx_time_t deadline,uint32_t * out_pending)868 zx_status_t fdio_wait(fdio_t* io, uint32_t events, zx_time_t deadline,
869                       uint32_t* out_pending) {
870     zx_handle_t h = ZX_HANDLE_INVALID;
871     zx_signals_t signals = 0;
872     io->ops->wait_begin(io, events, &h, &signals);
873     if (h == ZX_HANDLE_INVALID)
874         // Wait operation is not applicable to the handle.
875         return ZX_ERR_INVALID_ARGS;
876 
877     zx_signals_t pending;
878     zx_status_t status = zx_object_wait_one(h, signals, deadline, &pending);
879     if (status == ZX_OK || status == ZX_ERR_TIMED_OUT) {
880         io->ops->wait_end(io, pending, &events);
881         if (out_pending != NULL)
882             *out_pending = events;
883     }
884 
885     return status;
886 }
887 
888 __EXPORT
fdio_wait_fd(int fd,uint32_t events,uint32_t * _pending,zx_time_t deadline)889 zx_status_t fdio_wait_fd(int fd, uint32_t events, uint32_t* _pending, zx_time_t deadline) {
890     fdio_t* io = fd_to_io(fd);
891     if (io == NULL)
892         return ZX_ERR_BAD_HANDLE;
893 
894     zx_status_t status = fdio_wait(io, events, deadline, _pending);
895 
896     fdio_release(io);
897     return status;
898 }
899 
fdio_stat(fdio_t * io,struct stat * s)900 static zx_status_t fdio_stat(fdio_t* io, struct stat* s) {
901     fuchsia_io_NodeAttributes attr;
902     zx_status_t status = io->ops->get_attr(io, &attr);
903     if (status != ZX_OK) {
904         return status;
905     }
906 
907     memset(s, 0, sizeof(struct stat));
908     s->st_mode = attr.mode;
909     s->st_ino = attr.id;
910     s->st_size = attr.content_size;
911     s->st_blksize = VNATTR_BLKSIZE;
912     s->st_blocks = attr.storage_size / VNATTR_BLKSIZE;
913     s->st_nlink = attr.link_count;
914     s->st_ctim.tv_sec = attr.creation_time / ZX_SEC(1);
915     s->st_ctim.tv_nsec = attr.creation_time % ZX_SEC(1);
916     s->st_mtim.tv_sec = attr.modification_time / ZX_SEC(1);
917     s->st_mtim.tv_nsec = attr.modification_time % ZX_SEC(1);
918     return ZX_OK;
919 }
920 
921 // TODO(ZX-974): determine complete correct mapping
fdio_status_to_errno(zx_status_t status)922 int fdio_status_to_errno(zx_status_t status) {
923     switch (status) {
924     case ZX_ERR_NOT_FOUND: return ENOENT;
925     case ZX_ERR_NO_MEMORY: return ENOMEM;
926     case ZX_ERR_INVALID_ARGS: return EINVAL;
927     case ZX_ERR_BUFFER_TOO_SMALL: return EINVAL;
928     case ZX_ERR_TIMED_OUT: return ETIMEDOUT;
929     case ZX_ERR_UNAVAILABLE: return EBUSY;
930     case ZX_ERR_ALREADY_EXISTS: return EEXIST;
931     case ZX_ERR_PEER_CLOSED: return EPIPE;
932     case ZX_ERR_BAD_STATE: return EPIPE;
933     case ZX_ERR_BAD_PATH: return ENAMETOOLONG;
934     case ZX_ERR_IO: return EIO;
935     case ZX_ERR_NOT_FILE: return EISDIR;
936     case ZX_ERR_NOT_DIR: return ENOTDIR;
937     case ZX_ERR_NOT_SUPPORTED: return ENOTSUP;
938     case ZX_ERR_OUT_OF_RANGE: return EINVAL;
939     case ZX_ERR_NO_RESOURCES: return ENOMEM;
940     case ZX_ERR_BAD_HANDLE: return EBADF;
941     case ZX_ERR_ACCESS_DENIED: return EACCES;
942     case ZX_ERR_SHOULD_WAIT: return EAGAIN;
943     case ZX_ERR_FILE_BIG: return EFBIG;
944     case ZX_ERR_NO_SPACE: return ENOSPC;
945     case ZX_ERR_NOT_EMPTY: return ENOTEMPTY;
946     case ZX_ERR_IO_REFUSED: return ECONNREFUSED;
947     case ZX_ERR_IO_INVALID: return EIO;
948     case ZX_ERR_CANCELED: return EBADF;
949     case ZX_ERR_PROTOCOL_NOT_SUPPORTED: return EPROTONOSUPPORT;
950     case ZX_ERR_ADDRESS_UNREACHABLE: return ENETUNREACH;
951     case ZX_ERR_ADDRESS_IN_USE: return EADDRINUSE;
952     case ZX_ERR_NOT_CONNECTED: return ENOTCONN;
953     case ZX_ERR_CONNECTION_REFUSED: return ECONNREFUSED;
954     case ZX_ERR_CONNECTION_RESET: return ECONNRESET;
955     case ZX_ERR_CONNECTION_ABORTED: return ECONNABORTED;
956 
957     // No specific translation, so return a generic errno value.
958     default: return EIO;
959     }
960 }
961 
962 // The functions from here on provide implementations of fd and path
963 // centric posix-y io operations.
964 
965 __EXPORT
readv(int fd,const struct iovec * iov,int num)966 ssize_t readv(int fd, const struct iovec* iov, int num) {
967     ssize_t count = 0;
968     ssize_t r;
969     while (num > 0) {
970         if (iov->iov_len != 0) {
971             r = read(fd, iov->iov_base, iov->iov_len);
972             if (r < 0) {
973                 return count ? count : r;
974             }
975             if ((size_t)r < iov->iov_len) {
976                 return count + r;
977             }
978             count += r;
979         }
980         iov++;
981         num--;
982     }
983     return count;
984 }
985 
986 __EXPORT
writev(int fd,const struct iovec * iov,int num)987 ssize_t writev(int fd, const struct iovec* iov, int num) {
988     ssize_t count = 0;
989     ssize_t r;
990     while (num > 0) {
991         if (iov->iov_len != 0) {
992             r = write(fd, iov->iov_base, iov->iov_len);
993             if (r < 0) {
994                 return count ? count : r;
995             }
996             if ((size_t)r < iov->iov_len) {
997                 return count + r;
998             }
999             count += r;
1000         }
1001         iov++;
1002         num--;
1003     }
1004     return count;
1005 }
1006 
1007 __EXPORT
_mmap_file(size_t offset,size_t len,zx_vm_option_t zx_options,int flags,int fd,off_t fd_off,uintptr_t * out)1008 zx_status_t _mmap_file(size_t offset, size_t len, zx_vm_option_t zx_options, int flags, int fd,
1009                        off_t fd_off, uintptr_t* out) {
1010     fdio_t* io;
1011     if ((io = fd_to_io(fd)) == NULL) {
1012         return ZX_ERR_BAD_HANDLE;
1013     }
1014 
1015     int vflags = zx_options | (flags & MAP_PRIVATE ? fuchsia_io_VMO_FLAG_PRIVATE : 0);
1016     zx_handle_t vmo;
1017     zx_status_t r = io->ops->get_vmo(io, vflags, &vmo);
1018     fdio_release(io);
1019     if (r < 0) {
1020         return r;
1021     }
1022 
1023     uintptr_t ptr = 0;
1024     r = zx_vmar_map(zx_vmar_root_self(), zx_options, offset, vmo, fd_off, len, &ptr);
1025     zx_handle_close(vmo);
1026     // TODO: map this as shared if we ever implement forking
1027     if (r < 0) {
1028         return r;
1029     }
1030 
1031     *out = ptr;
1032     return ZX_OK;
1033 }
1034 
1035 __EXPORT
unlinkat(int dirfd,const char * path,int flags)1036 int unlinkat(int dirfd, const char* path, int flags) {
1037     char name[NAME_MAX + 1];
1038     fdio_t* io;
1039     zx_status_t r;
1040     if ((r = __fdio_opendir_containing_at(&io, dirfd, path, name)) < 0) {
1041         return ERROR(r);
1042     }
1043     r = io->ops->unlink(io, name, strlen(name));
1044     io->ops->close(io);
1045     fdio_release(io);
1046     return STATUS(r);
1047 }
1048 
1049 __EXPORT
read(int fd,void * buf,size_t count)1050 ssize_t read(int fd, void* buf, size_t count) {
1051     if (buf == NULL && count > 0) {
1052         return ERRNO(EINVAL);
1053     }
1054 
1055     fdio_t* io = fd_to_io(fd);
1056     if (io == NULL) {
1057         return ERRNO(EBADF);
1058     }
1059     zx_status_t status;
1060     for (;;) {
1061         status = io->ops->read(io, buf, count);
1062         if (status != ZX_ERR_SHOULD_WAIT || io->ioflag & IOFLAG_NONBLOCK) {
1063             break;
1064         }
1065         fdio_wait_fd(fd, FDIO_EVT_READABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1066     }
1067     fdio_release(io);
1068     return status < 0 ? STATUS(status) : status;
1069 }
1070 
1071 __EXPORT
write(int fd,const void * buf,size_t count)1072 ssize_t write(int fd, const void* buf, size_t count) {
1073     if (buf == NULL && count > 0) {
1074         return ERRNO(EINVAL);
1075     }
1076 
1077     fdio_t* io = fd_to_io(fd);
1078     if (io == NULL) {
1079         return ERRNO(EBADF);
1080     }
1081     zx_status_t status;
1082     for (;;) {
1083         status = io->ops->write(io, buf, count);
1084         if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1085             break;
1086         }
1087         fdio_wait_fd(fd, FDIO_EVT_WRITABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1088     }
1089     fdio_release(io);
1090     return status < 0 ? STATUS(status) : status;
1091 }
1092 
1093 __EXPORT
preadv(int fd,const struct iovec * iov,int count,off_t ofs)1094 ssize_t preadv(int fd, const struct iovec* iov, int count, off_t ofs) {
1095     ssize_t iov_count = 0;
1096     ssize_t r;
1097     while (count > 0) {
1098         if (iov->iov_len != 0) {
1099             r = pread(fd, iov->iov_base, iov->iov_len, ofs);
1100             if (r < 0) {
1101                 return iov_count ? iov_count : r;
1102             }
1103             if ((size_t)r < iov->iov_len) {
1104                 return iov_count + r;
1105             }
1106             iov_count += r;
1107             ofs += r;
1108         }
1109         iov++;
1110         count--;
1111     }
1112     return iov_count;
1113 }
1114 
1115 __EXPORT
pread(int fd,void * buf,size_t size,off_t ofs)1116 ssize_t pread(int fd, void* buf, size_t size, off_t ofs) {
1117     if (buf == NULL && size > 0) {
1118         return ERRNO(EINVAL);
1119     }
1120 
1121     fdio_t* io = fd_to_io(fd);
1122     if (io == NULL) {
1123         return ERRNO(EBADF);
1124     }
1125     zx_status_t status;
1126     for (;;) {
1127         status = io->ops->read_at(io, buf, size, ofs);
1128         if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1129             break;
1130         }
1131         fdio_wait_fd(fd, FDIO_EVT_READABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1132     }
1133     fdio_release(io);
1134     return status < 0 ? STATUS(status) : status;
1135 }
1136 
1137 __EXPORT
pwritev(int fd,const struct iovec * iov,int count,off_t ofs)1138 ssize_t pwritev(int fd, const struct iovec* iov, int count, off_t ofs) {
1139     ssize_t iov_count = 0;
1140     ssize_t r;
1141     while (count > 0) {
1142         if (iov->iov_len != 0) {
1143             r = pwrite(fd, iov->iov_base, iov->iov_len, ofs);
1144             if (r < 0) {
1145                 return iov_count ? iov_count : r;
1146             }
1147             if ((size_t)r < iov->iov_len) {
1148                 return iov_count + r;
1149             }
1150             iov_count += r;
1151             ofs += r;
1152         }
1153         iov++;
1154         count--;
1155     }
1156     return iov_count;
1157 }
1158 
1159 __EXPORT
pwrite(int fd,const void * buf,size_t size,off_t ofs)1160 ssize_t pwrite(int fd, const void* buf, size_t size, off_t ofs) {
1161     if (buf == NULL && size > 0) {
1162         return ERRNO(EINVAL);
1163     }
1164 
1165     fdio_t* io = fd_to_io(fd);
1166     if (io == NULL) {
1167         return ERRNO(EBADF);
1168     }
1169     zx_status_t status;
1170     for (;;) {
1171         status = io->ops->write_at(io, buf, size, ofs);
1172         if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1173             break;
1174         }
1175         fdio_wait_fd(fd, FDIO_EVT_WRITABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1176     }
1177     fdio_release(io);
1178     return status < 0 ? STATUS(status) : status;
1179 }
1180 
1181 __EXPORT
close(int fd)1182 int close(int fd) {
1183     mtx_lock(&fdio_lock);
1184     if ((fd < 0) || (fd >= FDIO_MAX_FD) || (fdio_fdtab[fd] == NULL)) {
1185         mtx_unlock(&fdio_lock);
1186         return ERRNO(EBADF);
1187     }
1188     fdio_t* io = fdio_fdtab[fd];
1189     io->dupcount--;
1190     fdio_fdtab[fd] = NULL;
1191     LOG(1, "fdio: close(%d) dupcount=%u\n", io->dupcount);
1192     if (io->dupcount > 0) {
1193         // still alive in other fdtab slots
1194         mtx_unlock(&fdio_lock);
1195         fdio_release(io);
1196         return ZX_OK;
1197     } else {
1198         mtx_unlock(&fdio_lock);
1199         int r = io->ops->close(io);
1200         fdio_release(io);
1201         return STATUS(r);
1202     }
1203 }
1204 
fdio_dup(int oldfd,int newfd,int starting_fd)1205 static int fdio_dup(int oldfd, int newfd, int starting_fd) {
1206     fdio_t* io = fd_to_io(oldfd);
1207     if (io == NULL) {
1208         return ERRNO(EBADF);
1209     }
1210     int fd = fdio_bind_to_fd(io, newfd, starting_fd);
1211     if (fd < 0) {
1212         fdio_release(io);
1213     }
1214     return fd;
1215 }
1216 
1217 __EXPORT
dup2(int oldfd,int newfd)1218 int dup2(int oldfd, int newfd) {
1219     return fdio_dup(oldfd, newfd, 0);
1220 }
1221 
1222 __EXPORT
dup(int oldfd)1223 int dup(int oldfd) {
1224     return fdio_dup(oldfd, -1, 0);
1225 }
1226 
1227 __EXPORT
dup3(int oldfd,int newfd,int flags)1228 int dup3(int oldfd, int newfd, int flags) {
1229     // dup3 differs from dup2 in that it fails with EINVAL, rather
1230     // than being a no op, on being given the same fd for both old and
1231     // new.
1232     if (oldfd == newfd) {
1233         return ERRNO(EINVAL);
1234     }
1235 
1236     if (flags != 0 && flags != O_CLOEXEC) {
1237         return ERRNO(EINVAL);
1238     }
1239 
1240     // TODO(ZX-973) Implement O_CLOEXEC.
1241     return fdio_dup(oldfd, newfd, 0);
1242 }
1243 
1244 __EXPORT
fcntl(int fd,int cmd,...)1245 int fcntl(int fd, int cmd, ...) {
1246 // Note that it is not safe to pull out the int out of the
1247 // variadic arguments at the top level, as callers are not
1248 // required to pass anything for many of the commands.
1249 #define GET_INT_ARG(ARG)         \
1250     va_list args;                \
1251     va_start(args, cmd);         \
1252     int ARG = va_arg(args, int); \
1253     va_end(args)
1254 
1255     switch (cmd) {
1256     case F_DUPFD:
1257     case F_DUPFD_CLOEXEC: {
1258         // TODO(ZX-973) Implement CLOEXEC.
1259         GET_INT_ARG(starting_fd);
1260         return fdio_dup(fd, -1, starting_fd);
1261     }
1262     case F_GETFD: {
1263         fdio_t* io = fd_to_io(fd);
1264         if (io == NULL) {
1265             return ERRNO(EBADF);
1266         }
1267         int flags = (int)(io->ioflag & IOFLAG_FD_FLAGS);
1268         // POSIX mandates that the return value be nonnegative if successful.
1269         assert(flags >= 0);
1270         fdio_release(io);
1271         return flags;
1272     }
1273     case F_SETFD: {
1274         fdio_t* io = fd_to_io(fd);
1275         if (io == NULL) {
1276             return ERRNO(EBADF);
1277         }
1278         GET_INT_ARG(flags);
1279         // TODO(ZX-973) Implement CLOEXEC.
1280         io->ioflag &= ~IOFLAG_FD_FLAGS;
1281         io->ioflag |= (uint32_t)flags & IOFLAG_FD_FLAGS;
1282         fdio_release(io);
1283         return 0;
1284     }
1285     case F_GETFL: {
1286         fdio_t* io = fd_to_io(fd);
1287         if (io == NULL) {
1288             return ERRNO(EBADF);
1289         }
1290         uint32_t flags = 0;
1291         zx_status_t r = io->ops->get_flags(io, &flags);
1292         if (r == ZX_ERR_NOT_SUPPORTED) {
1293             // We treat this as non-fatal, as it's valid for a remote to
1294             // simply not support FCNTL, but we still want to correctly
1295             // report the state of the (local) NONBLOCK flag
1296             flags = 0;
1297             r = ZX_OK;
1298         }
1299         flags = zxio_flags_to_fdio(flags);
1300         if (io->ioflag & IOFLAG_NONBLOCK) {
1301             flags |= O_NONBLOCK;
1302         }
1303         fdio_release(io);
1304         if (r < 0) {
1305             return STATUS(r);
1306         }
1307         return flags;
1308     }
1309     case F_SETFL: {
1310         fdio_t* io = fd_to_io(fd);
1311         if (io == NULL) {
1312             return ERRNO(EBADF);
1313         }
1314         GET_INT_ARG(n);
1315 
1316         zx_status_t r;
1317         uint32_t flags = fdio_flags_to_zxio(n & ~O_NONBLOCK);
1318         r = io->ops->set_flags(io, flags);
1319 
1320         // Some remotes don't support setting flags; we
1321         // can adjust their local flags anyway if NONBLOCK
1322         // is the only bit being toggled.
1323         if (r == ZX_ERR_NOT_SUPPORTED && ((n | O_NONBLOCK) == O_NONBLOCK)) {
1324             r = ZX_OK;
1325         }
1326 
1327         if (r != ZX_OK) {
1328             n = STATUS(r);
1329         } else {
1330             if (n & O_NONBLOCK) {
1331                 io->ioflag |= IOFLAG_NONBLOCK;
1332             } else {
1333                 io->ioflag &= ~IOFLAG_NONBLOCK;
1334             }
1335             n = 0;
1336         }
1337         fdio_release(io);
1338         return n;
1339     }
1340     case F_GETOWN:
1341     case F_SETOWN:
1342         // TODO(kulakowski) Socket support.
1343         return ERRNO(ENOSYS);
1344     case F_GETLK:
1345     case F_SETLK:
1346     case F_SETLKW:
1347         // TODO(kulakowski) Advisory file locking support.
1348         return ERRNO(ENOSYS);
1349     default:
1350         return ERRNO(EINVAL);
1351     }
1352 
1353 #undef GET_INT_ARG
1354 }
1355 
1356 __EXPORT
lseek(int fd,off_t offset,int whence)1357 off_t lseek(int fd, off_t offset, int whence) {
1358     fdio_t* io = fd_to_io(fd);
1359     if (io == NULL) {
1360         return ERRNO(EBADF);
1361     }
1362     off_t r = io->ops->seek(io, offset, whence);
1363     if (r == ZX_ERR_WRONG_TYPE) {
1364         // Although 'ESPIPE' is a bit of a misnomer, it is the valid errno
1365         // for any fd which does not implement seeking (i.e., for pipes,
1366         // sockets, etc).
1367         errno = ESPIPE;
1368         r = -1;
1369     } else if (r < 0) {
1370         r = ERROR(r);
1371     }
1372     fdio_release(io);
1373     return r;
1374 }
1375 
1376 #define READDIR_CMD_NONE  0
1377 #define READDIR_CMD_RESET 1
1378 
getdirents(int fd,void * ptr,size_t len,long cmd)1379 static int getdirents(int fd, void* ptr, size_t len, long cmd) {
1380     size_t actual;
1381     zx_status_t status;
1382     fdio_t* io = fd_to_io(fd);
1383     if (io == NULL) {
1384         return ERRNO(EBADF);
1385     }
1386     if (cmd == READDIR_CMD_RESET) {
1387         if ((status = io->ops->rewind(io)) != ZX_OK) {
1388             goto done;
1389         }
1390     }
1391     if ((status = io->ops->readdir(io, ptr, len, &actual)) != ZX_OK) {
1392         goto done;
1393     }
1394 
1395 done:
1396     fdio_release(io);
1397     return status == ZX_OK ? (int) actual : ERROR(status);
1398 }
1399 
truncateat(int dirfd,const char * path,off_t len)1400 static int truncateat(int dirfd, const char* path, off_t len) {
1401     fdio_t* io;
1402     zx_status_t r;
1403 
1404     if ((r = __fdio_open_at(&io, dirfd, path, O_WRONLY, 0)) < 0) {
1405         return ERROR(r);
1406     }
1407     r = io->ops->truncate(io, len);
1408     fdio_close(io);
1409     fdio_release(io);
1410     return STATUS(r);
1411 }
1412 
1413 __EXPORT
truncate(const char * path,off_t len)1414 int truncate(const char* path, off_t len) {
1415     return truncateat(AT_FDCWD, path, len);
1416 }
1417 
1418 __EXPORT
ftruncate(int fd,off_t len)1419 int ftruncate(int fd, off_t len) {
1420     fdio_t* io = fd_to_io(fd);
1421     if (io == NULL) {
1422         return ERRNO(EBADF);
1423     }
1424 
1425     zx_status_t r = io->ops->truncate(io, len);
1426     fdio_release(io);
1427     return STATUS(r);
1428 }
1429 
1430 // Filesystem operations (such as rename and link) which act on multiple paths
1431 // have some additional complexity on Zircon. These operations (eventually) act
1432 // on two pairs of variables: a source parent vnode + name, and a target parent
1433 // vnode + name. However, the loose coupling of these pairs can make their
1434 // correspondence difficult, especially when accessing each parent vnode may
1435 // involve crossing various filesystem boundaries.
1436 //
1437 // To resolve this problem, these kinds of operations involve:
1438 // - Opening the source parent vnode directly.
1439 // - Opening the target parent vnode directly, + acquiring a "vnode token".
1440 // - Sending the real operation + names to the source parent vnode, along with
1441 //   the "vnode token" representing the target parent vnode.
1442 //
1443 // Using zircon kernel primitives (cookies) to authenticate the vnode token, this
1444 // allows these multi-path operations to mix absolute / relative paths and cross
1445 // mount points with ease.
two_path_op_at(uint32_t op,int olddirfd,const char * oldpath,int newdirfd,const char * newpath)1446 static int two_path_op_at(uint32_t op, int olddirfd, const char* oldpath,
1447                           int newdirfd, const char* newpath) {
1448     char oldname[NAME_MAX + 1];
1449     fdio_t* io_oldparent;
1450     zx_status_t status = ZX_OK;
1451     if ((status = __fdio_opendir_containing_at(&io_oldparent, olddirfd, oldpath, oldname)) < 0) {
1452         return ERROR(status);
1453     }
1454 
1455     char newname[NAME_MAX + 1];
1456     fdio_t* io_newparent;
1457     if ((status = __fdio_opendir_containing_at(&io_newparent, newdirfd, newpath, newname)) < 0) {
1458         goto oldparent_open;
1459     }
1460 
1461     zx_handle_t token;
1462     status = io_newparent->ops->get_token(io_newparent, &token);
1463     if (status < 0) {
1464         goto newparent_open;
1465     }
1466 
1467     if (op == fuchsia_io_DirectoryRenameOrdinal) {
1468         status = io_oldparent->ops->rename(io_oldparent, oldname,
1469                                            strlen(oldname), token, newname,
1470                                            strlen(newname));
1471     } else if (op == fuchsia_io_DirectoryLinkOrdinal) {
1472         status = io_oldparent->ops->link(io_oldparent, oldname, strlen(oldname),
1473                                          token, newname, strlen(newname));
1474     } else {
1475         zx_handle_close(token);
1476         status = ZX_ERR_NOT_SUPPORTED;
1477     }
1478 newparent_open:
1479     io_newparent->ops->close(io_newparent);
1480     fdio_release(io_newparent);
1481 oldparent_open:
1482     io_oldparent->ops->close(io_oldparent);
1483     fdio_release(io_oldparent);
1484     return STATUS(status);
1485 }
1486 
1487 __EXPORT
renameat(int olddirfd,const char * oldpath,int newdirfd,const char * newpath)1488 int renameat(int olddirfd, const char* oldpath, int newdirfd, const char* newpath) {
1489     return two_path_op_at(fuchsia_io_DirectoryRenameOrdinal, olddirfd, oldpath, newdirfd, newpath);
1490 }
1491 
1492 __EXPORT
rename(const char * oldpath,const char * newpath)1493 int rename(const char* oldpath, const char* newpath) {
1494     return two_path_op_at(fuchsia_io_DirectoryRenameOrdinal, AT_FDCWD, oldpath, AT_FDCWD, newpath);
1495 }
1496 
1497 __EXPORT
link(const char * oldpath,const char * newpath)1498 int link(const char* oldpath, const char* newpath) {
1499     return two_path_op_at(fuchsia_io_DirectoryLinkOrdinal, AT_FDCWD, oldpath, AT_FDCWD, newpath);
1500 }
1501 
1502 __EXPORT
unlink(const char * path)1503 int unlink(const char* path) {
1504     return unlinkat(AT_FDCWD, path, 0);
1505 }
1506 
vopenat(int dirfd,const char * path,int flags,va_list args)1507 static int vopenat(int dirfd, const char* path, int flags, va_list args) {
1508     fdio_t* io = NULL;
1509     zx_status_t r;
1510     int fd;
1511     uint32_t mode = 0;
1512 
1513     if (flags & O_CREAT) {
1514         if (flags & O_DIRECTORY) {
1515             // The behavior of open with O_CREAT | O_DIRECTORY is underspecified
1516             // in POSIX. To help avoid programmer error, we explicitly disallow
1517             // the combination.
1518             return ERRNO(EINVAL);
1519         }
1520         mode = va_arg(args, uint32_t) & 0777;
1521     }
1522     if ((r = __fdio_open_at(&io, dirfd, path, flags, mode)) < 0) {
1523         return ERROR(r);
1524     }
1525     if (flags & O_NONBLOCK) {
1526         io->ioflag |= IOFLAG_NONBLOCK;
1527     }
1528     if ((fd = fdio_bind_to_fd(io, -1, 0)) < 0) {
1529         io->ops->close(io);
1530         fdio_release(io);
1531         return ERRNO(EMFILE);
1532     }
1533     return fd;
1534 }
1535 
1536 __EXPORT
open(const char * path,int flags,...)1537 int open(const char* path, int flags, ...) {
1538     va_list ap;
1539     va_start(ap, flags);
1540     int ret = vopenat(AT_FDCWD, path, flags, ap);
1541     va_end(ap);
1542     return ret;
1543 }
1544 
1545 __EXPORT
openat(int dirfd,const char * path,int flags,...)1546 int openat(int dirfd, const char* path, int flags, ...) {
1547     va_list ap;
1548     va_start(ap, flags);
1549     int ret = vopenat(dirfd, path, flags, ap);
1550     va_end(ap);
1551     return ret;
1552 }
1553 
1554 __EXPORT
mkdir(const char * path,mode_t mode)1555 int mkdir(const char* path, mode_t mode) {
1556     return mkdirat(AT_FDCWD, path, mode);
1557 }
1558 
1559 __EXPORT
mkdirat(int dirfd,const char * path,mode_t mode)1560 int mkdirat(int dirfd, const char* path, mode_t mode) {
1561     fdio_t* io = NULL;
1562     zx_status_t r;
1563 
1564     mode = (mode & 0777) | S_IFDIR;
1565 
1566     if ((r = __fdio_open_at(&io, dirfd, path, O_RDONLY | O_CREAT | O_EXCL, mode)) < 0) {
1567         return ERROR(r);
1568     }
1569     io->ops->close(io);
1570     fdio_release(io);
1571     return 0;
1572 }
1573 
1574 __EXPORT
fsync(int fd)1575 int fsync(int fd) {
1576     fdio_t* io = fd_to_io(fd);
1577     if (io == NULL) {
1578         return ERRNO(EBADF);
1579     }
1580     zx_status_t r = io->ops->sync(io);
1581     fdio_release(io);
1582     return STATUS(r);
1583 }
1584 
1585 __EXPORT
fdatasync(int fd)1586 int fdatasync(int fd) {
1587     // TODO(smklein): fdatasync does not need to flush metadata under certain
1588     // circumstances -- however, for now, this implementation will appear
1589     // functionally the same (if a little slower).
1590     return fsync(fd);
1591 }
1592 
1593 __EXPORT
syncfs(int fd)1594 int syncfs(int fd) {
1595     // TODO(smklein): Currently, fsync syncs the entire filesystem, not just
1596     // the target file descriptor. These functions should use different sync
1597     // mechanisms, where fsync is more fine-grained.
1598     return fsync(fd);
1599 }
1600 
1601 __EXPORT
fstat(int fd,struct stat * s)1602 int fstat(int fd, struct stat* s) {
1603     fdio_t* io = fd_to_io(fd);
1604     if (io == NULL) {
1605         return ERRNO(EBADF);
1606     }
1607     int r = STATUS(fdio_stat(io, s));
1608     fdio_release(io);
1609     return r;
1610 }
1611 
1612 __EXPORT
fstatat(int dirfd,const char * fn,struct stat * s,int flags)1613 int fstatat(int dirfd, const char* fn, struct stat* s, int flags) {
1614     fdio_t* io;
1615     zx_status_t r;
1616 
1617     LOG(1,"fdio: fstatat(%d, '%s',...)\n", dirfd, fn);
1618     if ((r = __fdio_open_at(&io, dirfd, fn, O_PATH, 0)) < 0) {
1619         return ERROR(r);
1620     }
1621     LOG(1,"fdio: fstatat io=%p\n", io);
1622     r = fdio_stat(io, s);
1623     fdio_close(io);
1624     fdio_release(io);
1625     return STATUS(r);
1626 }
1627 
1628 __EXPORT
stat(const char * fn,struct stat * s)1629 int stat(const char* fn, struct stat* s) {
1630     return fstatat(AT_FDCWD, fn, s, 0);
1631 }
1632 
1633 __EXPORT
lstat(const char * path,struct stat * buf)1634 int lstat(const char* path, struct stat* buf) {
1635     return stat(path, buf);
1636 }
1637 
1638 __EXPORT
realpath(const char * restrict filename,char * restrict resolved)1639 char* realpath(const char* restrict filename, char* restrict resolved) {
1640     ssize_t r;
1641     struct stat st;
1642     char tmp[PATH_MAX];
1643     size_t outlen;
1644     bool is_dir;
1645 
1646     if (!filename) {
1647         errno = EINVAL;
1648         return NULL;
1649     }
1650 
1651     if (filename[0] != '/') {
1652         // Convert 'filename' from a relative path to an absolute path.
1653         size_t file_len = strlen(filename);
1654         mtx_lock(&fdio_cwd_lock);
1655         size_t cwd_len = strlen(fdio_cwd_path);
1656         if (cwd_len + 1 + file_len >= PATH_MAX) {
1657             mtx_unlock(&fdio_cwd_lock);
1658             errno = ENAMETOOLONG;
1659             return NULL;
1660         }
1661         char tmp2[PATH_MAX];
1662         memcpy(tmp2, fdio_cwd_path, cwd_len);
1663         mtx_unlock(&fdio_cwd_lock);
1664         tmp2[cwd_len] = '/';
1665         strcpy(tmp2 + cwd_len + 1, filename);
1666         zx_status_t status = __fdio_cleanpath(tmp2, tmp, &outlen, &is_dir);
1667         if (status != ZX_OK) {
1668             errno = EINVAL;
1669             return NULL;
1670         }
1671     } else {
1672         // Clean the provided absolute path
1673         zx_status_t status = __fdio_cleanpath(filename, tmp, &outlen, &is_dir);
1674         if (status != ZX_OK) {
1675             errno = EINVAL;
1676             return NULL;
1677         }
1678 
1679         r = stat(tmp, &st);
1680         if (r < 0) {
1681             return NULL;
1682         }
1683     }
1684     return resolved ? strcpy(resolved, tmp) : strdup(tmp);
1685 }
1686 
zx_utimens(fdio_t * io,const struct timespec times[2],int flags)1687 static zx_status_t zx_utimens(fdio_t* io, const struct timespec times[2],
1688                               int flags) {
1689     fuchsia_io_NodeAttributes attr;
1690     memset(&attr, 0, sizeof(attr));
1691     uint32_t mask = 0;
1692 
1693     // Extract modify time.
1694     attr.modification_time = (times == NULL || times[1].tv_nsec == UTIME_NOW)
1695         ? zx_clock_get(ZX_CLOCK_UTC)
1696         : zx_time_add_duration(ZX_SEC(times[1].tv_sec), times[1].tv_nsec);
1697 
1698     if (times == NULL || times[1].tv_nsec != UTIME_OMIT) {
1699         // For setattr, tell which fields are valid.
1700         mask = fuchsia_io_NODE_ATTRIBUTE_FLAG_MODIFICATION_TIME;
1701     }
1702 
1703     // set time(s) on underlying object
1704     return io->ops->set_attr(io, mask, &attr);
1705 }
1706 
1707 __EXPORT
utimensat(int dirfd,const char * fn,const struct timespec times[2],int flags)1708 int utimensat(int dirfd, const char *fn,
1709               const struct timespec times[2], int flags) {
1710     fdio_t* io;
1711     zx_status_t r;
1712 
1713     // TODO(orr): AT_SYMLINK_NOFOLLOW
1714     if ((flags & AT_SYMLINK_NOFOLLOW) != 0) {
1715         // Allow this flag - don't return an error.  Fuchsia does not support
1716         // symlinks, so don't break utilities (like tar) that use this flag.
1717     }
1718 
1719     if ((r = __fdio_open_at(&io, dirfd, fn, 0, 0)) < 0) {
1720         return ERROR(r);
1721     }
1722 
1723     r = zx_utimens(io, times, 0);
1724 
1725     fdio_close(io);
1726     fdio_release(io);
1727     return STATUS(r);
1728 }
1729 
1730 __EXPORT
futimens(int fd,const struct timespec times[2])1731 int futimens(int fd, const struct timespec times[2]) {
1732     fdio_t* io = fd_to_io(fd);
1733     zx_status_t r = zx_utimens(io, times, 0);
1734     fdio_release(io);
1735     return STATUS(r);
1736 }
1737 
1738 __EXPORT
pipe2(int pipefd[2],int flags)1739 int pipe2(int pipefd[2], int flags) {
1740     const int allowed_flags = O_NONBLOCK | O_CLOEXEC;
1741     if (flags & ~allowed_flags) {
1742         return ERRNO(EINVAL);
1743     }
1744     fdio_t *a, *b;
1745     int r = fdio_pipe_pair(&a, &b);
1746     if (r < 0) {
1747         return ERROR(r);
1748     }
1749     pipefd[0] = fdio_bind_to_fd(a, -1, 0);
1750     if (pipefd[0] < 0) {
1751         int errno_ = errno;
1752         fdio_close(a);
1753         fdio_release(a);
1754         fdio_close(b);
1755         fdio_release(b);
1756         return ERRNO(errno_);
1757     }
1758     pipefd[1] = fdio_bind_to_fd(b, -1, 0);
1759     if (pipefd[1] < 0) {
1760         int errno_ = errno;
1761         close(pipefd[0]);
1762         fdio_close(b);
1763         fdio_release(b);
1764         return ERRNO(errno_);
1765     }
1766     return 0;
1767 }
1768 
1769 __EXPORT
pipe(int pipefd[2])1770 int pipe(int pipefd[2]) {
1771     return pipe2(pipefd, 0);
1772 }
1773 
1774 __EXPORT
socketpair(int domain,int type,int protocol,int fd[2])1775 int socketpair(int domain, int type, int protocol, int fd[2]) {
1776     if (type != SOCK_STREAM) {  // TODO(jamesr): SOCK_DGRAM
1777         errno = EPROTOTYPE;
1778         return -1;
1779     }
1780     if (domain != AF_UNIX) {
1781         errno = EAFNOSUPPORT;
1782         return -1;
1783     }
1784     if (protocol != 0) {
1785         errno = EPROTONOSUPPORT;
1786         return -1;
1787     }
1788 
1789     return pipe(fd);
1790 }
1791 
1792 __EXPORT
faccessat(int dirfd,const char * filename,int amode,int flag)1793 int faccessat(int dirfd, const char* filename, int amode, int flag) {
1794     // For now, we just check to see if the file exists, until we
1795     // model permissions. But first, check that the flags and amode
1796     // are valid.
1797     const int allowed_flags = AT_EACCESS;
1798     if (flag & (~allowed_flags)) {
1799         return ERRNO(EINVAL);
1800     }
1801 
1802     // amode is allowed to be either a subset of this mask, or just F_OK.
1803     const int allowed_modes = R_OK | W_OK | X_OK;
1804     if (amode != F_OK && (amode & (~allowed_modes))) {
1805         return ERRNO(EINVAL);
1806     }
1807 
1808     // Since we are not tracking permissions yet, just check that the
1809     // file exists a la fstatat.
1810     fdio_t* io;
1811     zx_status_t status;
1812     if ((status = __fdio_open_at(&io, dirfd, filename, 0, 0)) < 0) {
1813         return ERROR(status);
1814     }
1815     struct stat s;
1816     status = fdio_stat(io, &s);
1817     fdio_close(io);
1818     fdio_release(io);
1819     return STATUS(status);
1820 }
1821 
1822 __EXPORT
getcwd(char * buf,size_t size)1823 char* getcwd(char* buf, size_t size) {
1824     char tmp[PATH_MAX];
1825     if (buf == NULL) {
1826         buf = tmp;
1827         size = PATH_MAX;
1828     } else if (size == 0) {
1829         errno = EINVAL;
1830         return NULL;
1831     }
1832 
1833     char* out = NULL;
1834     mtx_lock(&fdio_cwd_lock);
1835     size_t len = strlen(fdio_cwd_path) + 1;
1836     if (len < size) {
1837         memcpy(buf, fdio_cwd_path, len);
1838         out = buf;
1839     } else {
1840         errno = ERANGE;
1841     }
1842     mtx_unlock(&fdio_cwd_lock);
1843 
1844     if (out == tmp) {
1845         out = strdup(tmp);
1846     }
1847     return out;
1848 }
1849 
fdio_chdir(fdio_t * io,const char * path)1850 void fdio_chdir(fdio_t* io, const char* path) {
1851     mtx_lock(&fdio_cwd_lock);
1852     update_cwd_path(path);
1853     mtx_lock(&fdio_lock);
1854     fdio_t* old = fdio_cwd_handle;
1855     fdio_cwd_handle = io;
1856     old->ops->close(old);
1857     fdio_release(old);
1858     mtx_unlock(&fdio_lock);
1859     mtx_unlock(&fdio_cwd_lock);
1860 }
1861 
1862 __EXPORT
chdir(const char * path)1863 int chdir(const char* path) {
1864     fdio_t* io;
1865     zx_status_t r;
1866     if ((r = __fdio_open(&io, path, O_RDONLY | O_DIRECTORY, 0)) < 0) {
1867         return STATUS(r);
1868     }
1869     fdio_chdir(io, path);
1870     return 0;
1871 }
1872 
1873 #define DIR_BUFSIZE 2048
1874 
1875 struct __dirstream {
1876     mtx_t lock;
1877     int fd;
1878     // Total size of 'data' which has been filled with dirents
1879     size_t size;
1880     // Offset into 'data' of next ptr. NULL to reset the
1881     // directory lazily on the next call to getdirents
1882     uint8_t* ptr;
1883     // Internal cache of dirents
1884     uint8_t data[DIR_BUFSIZE];
1885     // Buffer returned to user
1886     struct dirent de;
1887 };
1888 
internal_opendir(int fd)1889 static DIR* internal_opendir(int fd) {
1890     DIR* dir = calloc(1, sizeof(*dir));
1891     if (dir != NULL) {
1892         mtx_init(&dir->lock, mtx_plain);
1893         dir->size = 0;
1894         dir->fd = fd;
1895     }
1896     return dir;
1897 }
1898 
1899 __EXPORT
opendir(const char * name)1900 DIR* opendir(const char* name) {
1901     int fd = open(name, O_RDONLY | O_DIRECTORY);
1902     if (fd < 0)
1903         return NULL;
1904     DIR* dir = internal_opendir(fd);
1905     if (dir == NULL)
1906         close(fd);
1907     return dir;
1908 }
1909 
1910 __EXPORT
fdopendir(int fd)1911 DIR* fdopendir(int fd) {
1912     // Check the fd for validity, but we'll just store the fd
1913     // number so we don't save the fdio_t pointer.
1914     fdio_t* io = fd_to_io(fd);
1915     if (io == NULL) {
1916         errno = EBADF;
1917         return NULL;
1918     }
1919     // TODO(mcgrathr): Technically this should verify that it's
1920     // really a directory and fail with ENOTDIR if not.  But
1921     // that's not so easy to do, so don't bother for now.
1922     fdio_release(io);
1923     return internal_opendir(fd);
1924 }
1925 
1926 __EXPORT
closedir(DIR * dir)1927 int closedir(DIR* dir) {
1928     close(dir->fd);
1929     free(dir);
1930     return 0;
1931 }
1932 
1933 __EXPORT
readdir(DIR * dir)1934 struct dirent* readdir(DIR* dir) {
1935     mtx_lock(&dir->lock);
1936     struct dirent* de = &dir->de;
1937     for (;;) {
1938         if (dir->size >= sizeof(vdirent_t)) {
1939             vdirent_t* vde = (void*)dir->ptr;
1940 
1941             if (dir->size < vde->size + sizeof(vdirent_t)) {
1942                 // This buffer is corrupted (not large enough to hold a name).
1943                 // Reset it.
1944                 dir->ptr = NULL;
1945                 dir->size = 0;
1946                 break;
1947             }
1948 
1949             dir->ptr += vde->size + sizeof(vdirent_t);
1950             dir->size -= vde->size + sizeof(vdirent_t);
1951 
1952             if (vde->size == 0) {
1953                 // Skip nameless entries.
1954                 // (they may be generated by filtering filesystems)
1955                 continue;
1956             }
1957 
1958             // The remaining portion of the buffer is large
1959             // enough to hold the dirent name.
1960             size_t namelen = vde->size;
1961             de->d_ino = vde->ino;
1962             de->d_off = 0;
1963             // The d_reclen field is nonstandard, but existing code
1964             // may expect it to be useful as an upper bound on the
1965             // length of the name.
1966             de->d_reclen = offsetof(struct dirent, d_name) + namelen + 1;
1967             de->d_type = vde->type;
1968             memcpy(de->d_name, vde->name, namelen);
1969             de->d_name[namelen] = '\0';
1970             break;
1971         }
1972         int64_t cmd = (dir->ptr == NULL) ? READDIR_CMD_RESET : READDIR_CMD_NONE;
1973         int r = getdirents(dir->fd, dir->data, DIR_BUFSIZE, cmd);
1974         if (r > 0) {
1975             dir->ptr = dir->data;
1976             dir->size = r;
1977             continue;
1978         }
1979         de = NULL;
1980         break;
1981     }
1982     mtx_unlock(&dir->lock);
1983     return de;
1984 }
1985 
1986 __EXPORT
rewinddir(DIR * dir)1987 void rewinddir(DIR* dir) {
1988     mtx_lock(&dir->lock);
1989     dir->size = 0;
1990     dir->ptr = NULL;
1991     mtx_unlock(&dir->lock);
1992 }
1993 
1994 __EXPORT
dirfd(DIR * dir)1995 int dirfd(DIR* dir) {
1996     return dir->fd;
1997 }
1998 
1999 __EXPORT
isatty(int fd)2000 int isatty(int fd) {
2001     fdio_t* io = fd_to_io(fd);
2002     if (io == NULL) {
2003         errno = EBADF;
2004         return 0;
2005     }
2006 
2007     int ret;
2008     // TODO(ZX-972)
2009     // For now, stdout etc. needs to be a tty for line buffering to
2010     // work. So let's pretend those are ttys but nothing else is.
2011     if (fd == 0 || fd == 1 || fd == 2) {
2012         ret = 1;
2013     } else {
2014         ret = 0;
2015         errno = ENOTTY;
2016     }
2017 
2018     fdio_release(io);
2019 
2020     return ret;
2021 }
2022 
2023 __EXPORT
umask(mode_t mask)2024 mode_t umask(mode_t mask) {
2025     mode_t oldmask;
2026     mtx_lock(&fdio_lock);
2027     oldmask = __fdio_global_state.umask;
2028     __fdio_global_state.umask = mask & 0777;
2029     mtx_unlock(&fdio_lock);
2030     return oldmask;
2031 }
2032 
2033 __EXPORT
fdio_handle_fd(zx_handle_t h,zx_signals_t signals_in,zx_signals_t signals_out,bool shared_handle)2034 int fdio_handle_fd(zx_handle_t h, zx_signals_t signals_in, zx_signals_t signals_out,
2035                    bool shared_handle) {
2036     fdio_t* io = fdio_waitable_create(h, signals_in, signals_out, shared_handle);
2037     int fd = fdio_bind_to_fd(io, -1, 0);
2038     if (fd < 0) {
2039         fdio_close(io);
2040         fdio_release(io);
2041     }
2042     return fd;
2043 }
2044 
2045 // from fdio/unsafe.h, to support message-loop integration
2046 
2047 __EXPORT
fdio_unsafe_wait_begin(fdio_t * io,uint32_t events,zx_handle_t * handle_out,zx_signals_t * signals_out)2048 void fdio_unsafe_wait_begin(fdio_t* io, uint32_t events,
2049                        zx_handle_t* handle_out, zx_signals_t* signals_out) {
2050     return io->ops->wait_begin(io, events, handle_out, signals_out);
2051 }
2052 
2053 __EXPORT
fdio_unsafe_wait_end(fdio_t * io,zx_signals_t signals,uint32_t * events_out)2054 void fdio_unsafe_wait_end(fdio_t* io, zx_signals_t signals, uint32_t* events_out) {
2055     return io->ops->wait_end(io, signals, events_out);
2056 }
2057 
2058 __EXPORT
fdio_unsafe_release(fdio_t * io)2059 void fdio_unsafe_release(fdio_t* io) {
2060     fdio_release(io);
2061 }
2062 
2063 // TODO: getrlimit(RLIMIT_NOFILE, ...)
2064 #define MAX_POLL_NFDS 1024
2065 
2066 __EXPORT
ppoll(struct pollfd * fds,nfds_t n,const struct timespec * timeout_ts,const sigset_t * sigmask)2067 int ppoll(struct pollfd* fds, nfds_t n,
2068           const struct timespec* timeout_ts, const sigset_t* sigmask) {
2069     if (sigmask) {
2070         return ERRNO(ENOSYS);
2071     }
2072     if (n > MAX_POLL_NFDS) {
2073         return ERRNO(EINVAL);
2074     }
2075 
2076     fdio_t* ios[n];
2077     int ios_used_max = -1;
2078 
2079     zx_status_t r = ZX_OK;
2080     nfds_t nvalid = 0;
2081 
2082     zx_wait_item_t items[n];
2083 
2084     for (nfds_t i = 0; i < n; i++) {
2085         struct pollfd* pfd = &fds[i];
2086         pfd->revents = 0; // initialize to zero
2087 
2088         ios[i] = NULL;
2089         if (pfd->fd < 0) {
2090             // if fd is negative, the entry is invalid
2091             continue;
2092         }
2093         fdio_t* io;
2094         if ((io = fd_to_io(pfd->fd)) == NULL) {
2095             // fd is not opened
2096             pfd->revents = POLLNVAL;
2097             continue;
2098         }
2099         ios[i] = io;
2100         ios_used_max = i;
2101 
2102         zx_handle_t h;
2103         zx_signals_t sigs;
2104         io->ops->wait_begin(io, pfd->events, &h, &sigs);
2105         if (h == ZX_HANDLE_INVALID) {
2106             // wait operation is not applicable to the handle
2107             r = ZX_ERR_INVALID_ARGS;
2108             break;
2109         }
2110         items[nvalid].handle = h;
2111         items[nvalid].waitfor = sigs;
2112         items[nvalid].pending = 0;
2113         nvalid++;
2114     }
2115 
2116     int nfds = 0;
2117     if (r == ZX_OK && nvalid > 0) {
2118         zx_time_t tmo = ZX_TIME_INFINITE;
2119         // Check for overflows on every operation.
2120         if (timeout_ts && timeout_ts->tv_sec >= 0 && timeout_ts->tv_nsec >= 0 &&
2121             timeout_ts->tv_sec <= INT64_MAX / ZX_SEC(1)) {
2122             zx_duration_t seconds_duration = ZX_SEC(timeout_ts->tv_sec);
2123             zx_duration_t duration =
2124                 zx_duration_add_duration(seconds_duration, timeout_ts->tv_nsec);
2125             if (duration >= seconds_duration) {
2126                 tmo = zx_deadline_after(duration);
2127             }
2128         }
2129         r = zx_object_wait_many(items, nvalid, tmo);
2130         // pending signals could be reported on ZX_ERR_TIMED_OUT case as well
2131         if (r == ZX_OK || r == ZX_ERR_TIMED_OUT) {
2132             nfds_t j = 0; // j counts up on a valid entry
2133 
2134             for (nfds_t i = 0; i < n; i++) {
2135                 struct pollfd* pfd = &fds[i];
2136                 fdio_t* io = ios[i];
2137 
2138                 if (io == NULL) {
2139                     // skip an invalid entry
2140                     continue;
2141                 }
2142                 if (j < nvalid) {
2143                     uint32_t events = 0;
2144                     io->ops->wait_end(io, items[j].pending, &events);
2145                     // mask unrequested events except HUP/ERR
2146                     pfd->revents = events & (pfd->events | POLLHUP | POLLERR);
2147                     if (pfd->revents != 0) {
2148                         nfds++;
2149                     }
2150                 }
2151                 j++;
2152             }
2153         }
2154     }
2155 
2156     for (int i = 0; i <= ios_used_max; i++) {
2157         if (ios[i]) {
2158             fdio_release(ios[i]);
2159         }
2160     }
2161 
2162     return (r == ZX_OK || r == ZX_ERR_TIMED_OUT) ? nfds : ERROR(r);
2163 }
2164 
2165 __EXPORT
poll(struct pollfd * fds,nfds_t n,int timeout)2166 int poll(struct pollfd* fds, nfds_t n, int timeout) {
2167     struct timespec timeout_ts = {timeout / 1000, (timeout % 1000) * 1000000};
2168     struct timespec* ts = timeout >= 0 ? &timeout_ts : NULL;
2169     return ppoll(fds, n, ts, NULL);
2170 }
2171 
2172 __EXPORT
select(int n,fd_set * restrict rfds,fd_set * restrict wfds,fd_set * restrict efds,struct timeval * restrict tv)2173 int select(int n, fd_set* restrict rfds, fd_set* restrict wfds, fd_set* restrict efds,
2174            struct timeval* restrict tv) {
2175     if (n > FD_SETSIZE || n < 1) {
2176         return ERRNO(EINVAL);
2177     }
2178 
2179     fdio_t* ios[n];
2180     int ios_used_max = -1;
2181 
2182     zx_status_t r = ZX_OK;
2183     int nvalid = 0;
2184 
2185     zx_wait_item_t items[n];
2186 
2187     for (int fd = 0; fd < n; fd++) {
2188         ios[fd] = NULL;
2189 
2190         uint32_t events = 0;
2191         if (rfds && FD_ISSET(fd, rfds))
2192             events |= POLLIN;
2193         if (wfds && FD_ISSET(fd, wfds))
2194             events |= POLLOUT;
2195         if (efds && FD_ISSET(fd, efds))
2196             events |= POLLERR;
2197         if (events == 0) {
2198             continue;
2199         }
2200 
2201         fdio_t* io;
2202         if ((io = fd_to_io(fd)) == NULL) {
2203             r = ZX_ERR_BAD_HANDLE;
2204             break;
2205         }
2206         ios[fd] = io;
2207         ios_used_max = fd;
2208 
2209         zx_handle_t h;
2210         zx_signals_t sigs;
2211         io->ops->wait_begin(io, events, &h, &sigs);
2212         if (h == ZX_HANDLE_INVALID) {
2213             r = ZX_ERR_INVALID_ARGS;
2214             break;
2215         }
2216         items[nvalid].handle = h;
2217         items[nvalid].waitfor = sigs;
2218         items[nvalid].pending = 0;
2219         nvalid++;
2220     }
2221 
2222     int nfds = 0;
2223     if (r == ZX_OK && nvalid > 0) {
2224         zx_time_t tmo = (tv == NULL) ? ZX_TIME_INFINITE :
2225             zx_deadline_after(zx_duration_add_duration(ZX_SEC(tv->tv_sec), ZX_USEC(tv->tv_usec)));
2226         r = zx_object_wait_many(items, nvalid, tmo);
2227         // pending signals could be reported on ZX_ERR_TIMED_OUT case as well
2228         if (r == ZX_OK || r == ZX_ERR_TIMED_OUT) {
2229             int j = 0; // j counts up on a valid entry
2230 
2231             for (int fd = 0; fd < n; fd++) {
2232                 fdio_t* io = ios[fd];
2233                 if (io == NULL) {
2234                     // skip an invalid entry
2235                     continue;
2236                 }
2237                 if (j < nvalid) {
2238                     uint32_t events = 0;
2239                     io->ops->wait_end(io, items[j].pending, &events);
2240                     if (rfds && FD_ISSET(fd, rfds)) {
2241                         if (events & POLLIN) {
2242                             nfds++;
2243                         } else {
2244                             FD_CLR(fd, rfds);
2245                         }
2246                     }
2247                     if (wfds && FD_ISSET(fd, wfds)) {
2248                         if (events & POLLOUT) {
2249                             nfds++;
2250                         } else {
2251                             FD_CLR(fd, wfds);
2252                         }
2253                     }
2254                     if (efds && FD_ISSET(fd, efds)) {
2255                         if (events & POLLERR) {
2256                             nfds++;
2257                         } else {
2258                             FD_CLR(fd, efds);
2259                         }
2260                     }
2261                 } else {
2262                     if (rfds) {
2263                         FD_CLR(fd, rfds);
2264                     }
2265                     if (wfds) {
2266                         FD_CLR(fd, wfds);
2267                     }
2268                     if (efds) {
2269                         FD_CLR(fd, efds);
2270                     }
2271                 }
2272                 j++;
2273             }
2274         }
2275     }
2276 
2277     for (int i = 0; i <= ios_used_max; i++) {
2278         if (ios[i]) {
2279             fdio_release(ios[i]);
2280         }
2281     }
2282 
2283     return (r == ZX_OK || r == ZX_ERR_TIMED_OUT) ? nfds : ERROR(r);
2284 }
2285 
2286 __EXPORT
ioctl(int fd,int req,...)2287 int ioctl(int fd, int req, ...) {
2288     fdio_t* io;
2289     if ((io = fd_to_io(fd)) == NULL) {
2290         return ERRNO(EBADF);
2291     }
2292     va_list ap;
2293     va_start(ap, req);
2294     ssize_t r = io->ops->posix_ioctl(io, req, ap);
2295     va_end(ap);
2296     fdio_release(io);
2297     return STATUS(r);
2298 }
2299 
2300 __EXPORT
sendto(int fd,const void * buf,size_t buflen,int flags,const struct sockaddr * addr,socklen_t addrlen)2301 ssize_t sendto(int fd, const void* buf, size_t buflen, int flags, const struct sockaddr* addr, socklen_t addrlen) {
2302     fdio_t* io = fd_to_io(fd);
2303     if (io == NULL) {
2304         return ERRNO(EBADF);
2305     }
2306     ssize_t r = io->ops->sendto(io, buf, buflen, flags, addr, addrlen);
2307     fdio_release(io);
2308     return r < 0 ? STATUS(r) : r;
2309 }
2310 
2311 __EXPORT
recvfrom(int fd,void * restrict buf,size_t buflen,int flags,struct sockaddr * restrict addr,socklen_t * restrict addrlen)2312 ssize_t recvfrom(int fd, void* restrict buf, size_t buflen, int flags, struct sockaddr* restrict addr, socklen_t* restrict addrlen) {
2313     fdio_t* io = fd_to_io(fd);
2314     if (io == NULL) {
2315         return ERRNO(EBADF);
2316     }
2317     if (addr != NULL && addrlen == NULL) {
2318         return ERRNO(EFAULT);
2319     }
2320     ssize_t r = io->ops->recvfrom(io, buf, buflen, flags, addr, addrlen);
2321     fdio_release(io);
2322     return r < 0 ? STATUS(r) : r;
2323 }
2324 
2325 __EXPORT
sendmsg(int fd,const struct msghdr * msg,int flags)2326 ssize_t sendmsg(int fd, const struct msghdr *msg, int flags) {
2327     fdio_t* io = fd_to_io(fd);
2328     if (io == NULL) {
2329         return ERRNO(EBADF);
2330     }
2331     ssize_t r = io->ops->sendmsg(io, msg, flags);
2332     fdio_release(io);
2333     return r < 0 ? STATUS(r) : r;
2334 }
2335 
2336 __EXPORT
recvmsg(int fd,struct msghdr * msg,int flags)2337 ssize_t recvmsg(int fd, struct msghdr* msg, int flags) {
2338     fdio_t* io = fd_to_io(fd);
2339     if (io == NULL) {
2340         return ERRNO(EBADF);
2341     }
2342     ssize_t r = io->ops->recvmsg(io, msg, flags);
2343     fdio_release(io);
2344     return r < 0 ? STATUS(r) : r;
2345 }
2346 
2347 __EXPORT
shutdown(int fd,int how)2348 int shutdown(int fd, int how) {
2349     fdio_t* io;
2350     if ((io = fd_to_io(fd)) == NULL) {
2351         return ERRNO(EBADF);
2352     }
2353     zx_status_t r = io->ops->shutdown(io, how);
2354     fdio_release(io);
2355     if (r == ZX_ERR_BAD_STATE) {
2356         return ERRNO(ENOTCONN);
2357     }
2358     if (r == ZX_ERR_WRONG_TYPE) {
2359         return ERRNO(ENOTSOCK);
2360     }
2361     return STATUS(r);
2362 }
2363 
2364 __EXPORT
fstatfs(int fd,struct statfs * buf)2365 int fstatfs(int fd, struct statfs* buf) {
2366     fdio_t* io;
2367     if ((io = fd_to_io(fd)) == NULL) {
2368         return ERRNO(EBADF);
2369     }
2370     zx_handle_t handle = fdio_unsafe_borrow_channel(io);
2371     if (handle == ZX_HANDLE_INVALID) {
2372         fdio_release(io);
2373         return ERRNO(ENOTSUP);
2374     }
2375     zx_status_t status;
2376     fuchsia_io_FilesystemInfo info;
2377     zx_status_t io_status = fuchsia_io_DirectoryAdminQueryFilesystem(handle, &status, &info);
2378     fdio_release(io);
2379     if (io_status != ZX_OK) {
2380         return ERRNO(fdio_status_to_errno(io_status));
2381     } else if (status != ZX_OK) {
2382         return ERRNO(fdio_status_to_errno(status));
2383     }
2384 
2385     info.name[fuchsia_io_MAX_FS_NAME_BUFFER - 1] = '\0';
2386 
2387     struct statfs stats = {};
2388 
2389     if (info.block_size) {
2390         stats.f_bsize = info.block_size;
2391         stats.f_blocks = info.total_bytes / stats.f_bsize;
2392         stats.f_bfree = stats.f_blocks - info.used_bytes / stats.f_bsize;
2393     }
2394     stats.f_bavail = stats.f_bfree;
2395     stats.f_files = info.total_nodes;
2396     stats.f_ffree = info.total_nodes - info.used_nodes;
2397     stats.f_namelen = info.max_filename_size;
2398     stats.f_type = info.fs_type;
2399     stats.f_fsid.__val[0] = info.fs_id;
2400     stats.f_fsid.__val[1] = info.fs_id >> 32;
2401 
2402     *buf = stats;
2403     return 0;
2404 }
2405 
2406 __EXPORT
statfs(const char * path,struct statfs * buf)2407 int statfs(const char* path, struct statfs* buf) {
2408     int fd = open(path, O_RDONLY | O_CLOEXEC);
2409     if (fd < 0) {
2410         return fd;
2411     }
2412     int rv = fstatfs(fd, buf);
2413     close(fd);
2414     return rv;
2415 }
2416 
2417 __EXPORT
_fd_open_max(void)2418 int _fd_open_max(void) {
2419     return FDIO_MAX_FD;
2420 }
2421