1 /*
2  * Copyright (c) 2018 Linaro Limited
3  * Copyright (c) 2024 Tenstorrent AI ULC
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 /**
9  * @file
10  * @brief File descriptor table
11  *
12  * This file provides generic file descriptor table implementation, suitable
13  * for any I/O object implementing POSIX I/O semantics (i.e. read/write +
14  * aux operations).
15  */
16 
17 #include <errno.h>
18 #include <string.h>
19 #include <stdio.h>
20 
21 #include <zephyr/kernel.h>
22 #include <zephyr/sys/fdtable.h>
23 #include <zephyr/sys/speculation.h>
24 #include <zephyr/internal/syscall_handler.h>
25 #include <zephyr/sys/atomic.h>
26 
27 struct stat;
28 
29 struct fd_entry {
30 	void *obj;
31 	const struct fd_op_vtable *vtable;
32 	atomic_t refcount;
33 	struct k_mutex lock;
34 	struct k_condvar cond;
35 	size_t offset;
36 	uint32_t mode;
37 };
38 
39 #if defined(CONFIG_POSIX_DEVICE_IO)
40 static const struct fd_op_vtable stdinout_fd_op_vtable;
41 
42 BUILD_ASSERT(CONFIG_ZVFS_OPEN_MAX >= 3, "CONFIG_ZVFS_OPEN_MAX >= 3 for CONFIG_POSIX_DEVICE_IO");
43 #endif /* defined(CONFIG_POSIX_DEVICE_IO) */
44 
45 static struct fd_entry fdtable[CONFIG_ZVFS_OPEN_MAX] = {
46 #if defined(CONFIG_POSIX_DEVICE_IO)
47 	/*
48 	 * Predefine entries for stdin/stdout/stderr.
49 	 */
50 	{
51 		/* STDIN */
52 		.vtable = &stdinout_fd_op_vtable,
53 		.refcount = ATOMIC_INIT(1),
54 		.lock = Z_MUTEX_INITIALIZER(fdtable[0].lock),
55 		.cond = Z_CONDVAR_INITIALIZER(fdtable[0].cond),
56 	},
57 	{
58 		/* STDOUT */
59 		.vtable = &stdinout_fd_op_vtable,
60 		.refcount = ATOMIC_INIT(1),
61 		.lock = Z_MUTEX_INITIALIZER(fdtable[1].lock),
62 		.cond = Z_CONDVAR_INITIALIZER(fdtable[1].cond),
63 	},
64 	{
65 		/* STDERR */
66 		.vtable = &stdinout_fd_op_vtable,
67 		.refcount = ATOMIC_INIT(1),
68 		.lock = Z_MUTEX_INITIALIZER(fdtable[2].lock),
69 		.cond = Z_CONDVAR_INITIALIZER(fdtable[2].cond),
70 	},
71 #else
72 	{0},
73 #endif
74 };
75 
76 static K_MUTEX_DEFINE(fdtable_lock);
77 
z_fd_ref(int fd)78 static int z_fd_ref(int fd)
79 {
80 	return atomic_inc(&fdtable[fd].refcount) + 1;
81 }
82 
z_fd_unref(int fd)83 static int z_fd_unref(int fd)
84 {
85 	atomic_val_t old_rc;
86 
87 	/* Reference counter must be checked to avoid decrement refcount below
88 	 * zero causing file descriptor leak. Loop statement below executes
89 	 * atomic decrement if refcount value is grater than zero. Otherwise,
90 	 * refcount is not going to be written.
91 	 */
92 	do {
93 		old_rc = atomic_get(&fdtable[fd].refcount);
94 		if (!old_rc) {
95 			return 0;
96 		}
97 	} while (!atomic_cas(&fdtable[fd].refcount, old_rc, old_rc - 1));
98 
99 	if (old_rc != 1) {
100 		return old_rc - 1;
101 	}
102 
103 	fdtable[fd].obj = NULL;
104 	fdtable[fd].vtable = NULL;
105 
106 	return 0;
107 }
108 
_find_fd_entry(void)109 static int _find_fd_entry(void)
110 {
111 	int fd;
112 
113 	for (fd = 0; fd < ARRAY_SIZE(fdtable); fd++) {
114 		if (!atomic_get(&fdtable[fd].refcount)) {
115 			return fd;
116 		}
117 	}
118 
119 	errno = ENFILE;
120 	return -1;
121 }
122 
_check_fd(int fd)123 static int _check_fd(int fd)
124 {
125 	if ((fd < 0) || (fd >= ARRAY_SIZE(fdtable))) {
126 		errno = EBADF;
127 		return -1;
128 	}
129 
130 	fd = k_array_index_sanitize(fd, ARRAY_SIZE(fdtable));
131 
132 	if (!atomic_get(&fdtable[fd].refcount)) {
133 		errno = EBADF;
134 		return -1;
135 	}
136 
137 	return 0;
138 }
139 
140 #ifdef CONFIG_ZTEST
fdtable_fd_is_initialized(int fd)141 bool fdtable_fd_is_initialized(int fd)
142 {
143 	struct k_mutex ref_lock;
144 	struct k_condvar ref_cond;
145 
146 	if (fd < 0 || fd >= ARRAY_SIZE(fdtable)) {
147 		return false;
148 	}
149 
150 	ref_lock = (struct k_mutex)Z_MUTEX_INITIALIZER(fdtable[fd].lock);
151 	if (memcmp(&ref_lock, &fdtable[fd].lock, sizeof(ref_lock)) != 0) {
152 		return false;
153 	}
154 
155 	ref_cond = (struct k_condvar)Z_CONDVAR_INITIALIZER(fdtable[fd].cond);
156 	if (memcmp(&ref_cond, &fdtable[fd].cond, sizeof(ref_cond)) != 0) {
157 		return false;
158 	}
159 
160 	return true;
161 }
162 #endif /* CONFIG_ZTEST */
163 
zvfs_get_fd_obj(int fd,const struct fd_op_vtable * vtable,int err)164 void *zvfs_get_fd_obj(int fd, const struct fd_op_vtable *vtable, int err)
165 {
166 	struct fd_entry *entry;
167 
168 	if (_check_fd(fd) < 0) {
169 		return NULL;
170 	}
171 
172 	entry = &fdtable[fd];
173 
174 	if ((vtable != NULL) && (entry->vtable != vtable)) {
175 		errno = err;
176 		return NULL;
177 	}
178 
179 	return entry->obj;
180 }
181 
z_get_fd_by_obj_and_vtable(void * obj,const struct fd_op_vtable * vtable)182 static int z_get_fd_by_obj_and_vtable(void *obj, const struct fd_op_vtable *vtable)
183 {
184 	int fd;
185 
186 	for (fd = 0; fd < ARRAY_SIZE(fdtable); fd++) {
187 		if (fdtable[fd].obj == obj && fdtable[fd].vtable == vtable) {
188 			return fd;
189 		}
190 	}
191 
192 	errno = ENFILE;
193 	return -1;
194 }
195 
zvfs_get_obj_lock_and_cond(void * obj,const struct fd_op_vtable * vtable,struct k_mutex ** lock,struct k_condvar ** cond)196 bool zvfs_get_obj_lock_and_cond(void *obj, const struct fd_op_vtable *vtable, struct k_mutex **lock,
197 			     struct k_condvar **cond)
198 {
199 	int fd;
200 	struct fd_entry *entry;
201 
202 	fd = z_get_fd_by_obj_and_vtable(obj, vtable);
203 	if (_check_fd(fd) < 0) {
204 		return false;
205 	}
206 
207 	entry = &fdtable[fd];
208 
209 	if (lock) {
210 		*lock = &entry->lock;
211 	}
212 
213 	if (cond) {
214 		*cond = &entry->cond;
215 	}
216 
217 	return true;
218 }
219 
zvfs_get_fd_obj_and_vtable(int fd,const struct fd_op_vtable ** vtable,struct k_mutex ** lock)220 void *zvfs_get_fd_obj_and_vtable(int fd, const struct fd_op_vtable **vtable,
221 			      struct k_mutex **lock)
222 {
223 	struct fd_entry *entry;
224 
225 	if (_check_fd(fd) < 0) {
226 		return NULL;
227 	}
228 
229 	entry = &fdtable[fd];
230 	*vtable = entry->vtable;
231 
232 	if (lock != NULL) {
233 		*lock = &entry->lock;
234 	}
235 
236 	return entry->obj;
237 }
238 
zvfs_reserve_fd(void)239 int zvfs_reserve_fd(void)
240 {
241 	int fd;
242 
243 	(void)k_mutex_lock(&fdtable_lock, K_FOREVER);
244 
245 	fd = _find_fd_entry();
246 	if (fd >= 0) {
247 		/* Mark entry as used, zvfs_finalize_fd() will fill it in. */
248 		(void)z_fd_ref(fd);
249 		fdtable[fd].obj = NULL;
250 		fdtable[fd].vtable = NULL;
251 		fdtable[fd].offset = 0;
252 		k_mutex_init(&fdtable[fd].lock);
253 		k_condvar_init(&fdtable[fd].cond);
254 	}
255 
256 	k_mutex_unlock(&fdtable_lock);
257 
258 	return fd;
259 }
260 
zvfs_finalize_typed_fd(int fd,void * obj,const struct fd_op_vtable * vtable,uint32_t mode)261 void zvfs_finalize_typed_fd(int fd, void *obj, const struct fd_op_vtable *vtable, uint32_t mode)
262 {
263 	/* Assumes fd was already bounds-checked. */
264 #ifdef CONFIG_USERSPACE
265 	/* descriptor context objects are inserted into the table when they
266 	 * are ready for use. Mark the object as initialized and grant the
267 	 * caller (and only the caller) access.
268 	 *
269 	 * This call is a no-op if obj is invalid or points to something
270 	 * not a kernel object.
271 	 */
272 	k_object_recycle(obj);
273 #endif
274 	fdtable[fd].obj = obj;
275 	fdtable[fd].vtable = vtable;
276 	fdtable[fd].mode = mode;
277 
278 	/* Let the object know about the lock just in case it needs it
279 	 * for something. For BSD sockets, the lock is used with condition
280 	 * variables to avoid keeping the lock for a long period of time.
281 	 */
282 	if (vtable && vtable->ioctl) {
283 		int prev_errno = errno;
284 
285 		(void)zvfs_fdtable_call_ioctl(vtable, obj, ZFD_IOCTL_SET_LOCK,
286 					   &fdtable[fd].lock);
287 		if ((prev_errno != EOPNOTSUPP) && (errno == EOPNOTSUPP)) {
288 			/* restore backed-up errno value if the backend does not support locking */
289 			errno = prev_errno;
290 		}
291 	}
292 }
293 
zvfs_free_fd(int fd)294 void zvfs_free_fd(int fd)
295 {
296 	/* Assumes fd was already bounds-checked. */
297 	(void)z_fd_unref(fd);
298 }
299 
zvfs_alloc_fd(void * obj,const struct fd_op_vtable * vtable)300 int zvfs_alloc_fd(void *obj, const struct fd_op_vtable *vtable)
301 {
302 	int fd;
303 
304 	fd = zvfs_reserve_fd();
305 	if (fd >= 0) {
306 		zvfs_finalize_fd(fd, obj, vtable);
307 	}
308 
309 	return fd;
310 }
311 
supports_pread_pwrite(uint32_t mode)312 static bool supports_pread_pwrite(uint32_t mode)
313 {
314 	switch (mode & ZVFS_MODE_IFMT) {
315 	case ZVFS_MODE_IFSHM:
316 		return true;
317 	default:
318 		return false;
319 	}
320 }
321 
zvfs_rw(int fd,void * buf,size_t sz,bool is_write,const size_t * from_offset)322 static ssize_t zvfs_rw(int fd, void *buf, size_t sz, bool is_write, const size_t *from_offset)
323 {
324 	bool prw;
325 	ssize_t res;
326 	const size_t *off;
327 
328 	if (_check_fd(fd) < 0) {
329 		return -1;
330 	}
331 
332 	(void)k_mutex_lock(&fdtable[fd].lock, K_FOREVER);
333 
334 	prw = supports_pread_pwrite(fdtable[fd].mode);
335 	if (from_offset != NULL && !prw) {
336 		/*
337 		 * Seekable file types should support pread() / pwrite() and per-fd offset passing.
338 		 * Otherwise, it's a bug.
339 		 */
340 		errno = ENOTSUP;
341 		res = -1;
342 		goto unlock;
343 	}
344 
345 	/* If there is no specified from_offset, then use the current offset of the fd */
346 	off = (from_offset == NULL) ? &fdtable[fd].offset : from_offset;
347 
348 	if (is_write) {
349 		if (fdtable[fd].vtable->write_offs == NULL) {
350 			res = -1;
351 			errno = EIO;
352 		} else {
353 			res = fdtable[fd].vtable->write_offs(fdtable[fd].obj, buf, sz, *off);
354 		}
355 	} else {
356 		if (fdtable[fd].vtable->read_offs == NULL) {
357 			res = -1;
358 			errno = EIO;
359 		} else {
360 			res = fdtable[fd].vtable->read_offs(fdtable[fd].obj, buf, sz, *off);
361 		}
362 	}
363 	if (res > 0 && prw && from_offset == NULL) {
364 		/*
365 		 * only update the fd offset when from_offset is not specified
366 		 * See pread() / pwrite()
367 		 */
368 		fdtable[fd].offset += res;
369 	}
370 
371 unlock:
372 	k_mutex_unlock(&fdtable[fd].lock);
373 
374 	return res;
375 }
376 
zvfs_read(int fd,void * buf,size_t sz,const size_t * from_offset)377 ssize_t zvfs_read(int fd, void *buf, size_t sz, const size_t *from_offset)
378 {
379 	return zvfs_rw(fd, buf, sz, false, from_offset);
380 }
381 
zvfs_write(int fd,const void * buf,size_t sz,const size_t * from_offset)382 ssize_t zvfs_write(int fd, const void *buf, size_t sz, const size_t *from_offset)
383 {
384 	return zvfs_rw(fd, (void *)buf, sz, true, from_offset);
385 }
386 
zvfs_close(int fd)387 int zvfs_close(int fd)
388 {
389 	int res = 0;
390 
391 	if (_check_fd(fd) < 0) {
392 		return -1;
393 	}
394 
395 	(void)k_mutex_lock(&fdtable[fd].lock, K_FOREVER);
396 	if (fdtable[fd].vtable->close != NULL) {
397 		/* close() is optional - e.g. stdinout_fd_op_vtable */
398 		if (fdtable[fd].mode & ZVFS_MODE_IFSOCK) {
399 			/* Network socket needs to know socket number so pass
400 			 * it via close2() call.
401 			 */
402 			res = fdtable[fd].vtable->close2(fdtable[fd].obj, fd);
403 		} else {
404 			res = fdtable[fd].vtable->close(fdtable[fd].obj);
405 		}
406 	}
407 	k_mutex_unlock(&fdtable[fd].lock);
408 
409 	zvfs_free_fd(fd);
410 
411 	return res;
412 }
413 
zvfs_fdopen(int fd,const char * mode)414 FILE *zvfs_fdopen(int fd, const char *mode)
415 {
416 	ARG_UNUSED(mode);
417 
418 	if (_check_fd(fd) < 0) {
419 		return NULL;
420 	}
421 
422 	return (FILE *)&fdtable[fd];
423 }
424 
zvfs_fileno(FILE * file)425 int zvfs_fileno(FILE *file)
426 {
427 	if (!IS_ARRAY_ELEMENT(fdtable, file)) {
428 		errno = EBADF;
429 		return -1;
430 	}
431 
432 	return (struct fd_entry *)file - fdtable;
433 }
434 
zvfs_fstat(int fd,struct stat * buf)435 int zvfs_fstat(int fd, struct stat *buf)
436 {
437 	if (_check_fd(fd) < 0) {
438 		return -1;
439 	}
440 
441 	return zvfs_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_STAT, buf);
442 }
443 
zvfs_fsync(int fd)444 int zvfs_fsync(int fd)
445 {
446 	if (_check_fd(fd) < 0) {
447 		return -1;
448 	}
449 
450 	return zvfs_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_FSYNC);
451 }
452 
zvfs_lseek_wrap(int fd,int cmd,...)453 static inline off_t zvfs_lseek_wrap(int fd, int cmd, ...)
454 {
455 	off_t res;
456 	va_list args;
457 
458 	__ASSERT_NO_MSG(fd < ARRAY_SIZE(fdtable));
459 
460 	(void)k_mutex_lock(&fdtable[fd].lock, K_FOREVER);
461 	va_start(args, cmd);
462 	res = fdtable[fd].vtable->ioctl(fdtable[fd].obj, cmd, args);
463 	va_end(args);
464 	if (res >= 0) {
465 		switch (fdtable[fd].mode & ZVFS_MODE_IFMT) {
466 		case ZVFS_MODE_IFDIR:
467 		case ZVFS_MODE_IFBLK:
468 		case ZVFS_MODE_IFSHM:
469 		case ZVFS_MODE_IFREG:
470 			fdtable[fd].offset = res;
471 			break;
472 		default:
473 			break;
474 		}
475 	}
476 	k_mutex_unlock(&fdtable[fd].lock);
477 
478 	return res;
479 }
480 
zvfs_lseek(int fd,off_t offset,int whence)481 off_t zvfs_lseek(int fd, off_t offset, int whence)
482 {
483 	if (_check_fd(fd) < 0) {
484 		return -1;
485 	}
486 
487 	return zvfs_lseek_wrap(fd, ZFD_IOCTL_LSEEK, offset, whence, fdtable[fd].offset);
488 }
489 
zvfs_fcntl(int fd,int cmd,va_list args)490 int zvfs_fcntl(int fd, int cmd, va_list args)
491 {
492 	int res;
493 
494 	if (_check_fd(fd) < 0) {
495 		return -1;
496 	}
497 
498 	/* The rest of commands are per-fd, handled by ioctl vmethod. */
499 	res = fdtable[fd].vtable->ioctl(fdtable[fd].obj, cmd, args);
500 
501 	return res;
502 }
503 
zvfs_ftruncate_wrap(int fd,int cmd,...)504 static inline int zvfs_ftruncate_wrap(int fd, int cmd, ...)
505 {
506 	int res;
507 	va_list args;
508 
509 	__ASSERT_NO_MSG(fd < ARRAY_SIZE(fdtable));
510 
511 	(void)k_mutex_lock(&fdtable[fd].lock, K_FOREVER);
512 	va_start(args, cmd);
513 	res = fdtable[fd].vtable->ioctl(fdtable[fd].obj, cmd, args);
514 	va_end(args);
515 	k_mutex_unlock(&fdtable[fd].lock);
516 
517 	return res;
518 }
519 
zvfs_ftruncate(int fd,off_t length)520 int zvfs_ftruncate(int fd, off_t length)
521 {
522 	if (_check_fd(fd) < 0) {
523 		return -1;
524 	}
525 
526 	return zvfs_ftruncate_wrap(fd, ZFD_IOCTL_TRUNCATE, length);
527 }
528 
zvfs_ioctl(int fd,unsigned long request,va_list args)529 int zvfs_ioctl(int fd, unsigned long request, va_list args)
530 {
531 	if (_check_fd(fd) < 0) {
532 		return -1;
533 	}
534 
535 	return fdtable[fd].vtable->ioctl(fdtable[fd].obj, request, args);
536 }
537 
538 
539 #if defined(CONFIG_POSIX_DEVICE_IO)
540 /*
541  * fd operations for stdio/stdout/stderr
542  */
543 
544 int z_impl_zephyr_write_stdout(const char *buf, int nbytes);
545 
stdinout_read_vmeth(void * obj,void * buffer,size_t count)546 static ssize_t stdinout_read_vmeth(void *obj, void *buffer, size_t count)
547 {
548 	return 0;
549 }
550 
stdinout_write_vmeth(void * obj,const void * buffer,size_t count)551 static ssize_t stdinout_write_vmeth(void *obj, const void *buffer, size_t count)
552 {
553 #if defined(CONFIG_NEWLIB_LIBC) || defined(CONFIG_ARCMWDT_LIBC)
554 	return z_impl_zephyr_write_stdout(buffer, count);
555 #else
556 	return 0;
557 #endif
558 }
559 
stdinout_ioctl_vmeth(void * obj,unsigned int request,va_list args)560 static int stdinout_ioctl_vmeth(void *obj, unsigned int request, va_list args)
561 {
562 	errno = EINVAL;
563 	return -1;
564 }
565 
566 
567 static const struct fd_op_vtable stdinout_fd_op_vtable = {
568 	.read = stdinout_read_vmeth,
569 	.write = stdinout_write_vmeth,
570 	.ioctl = stdinout_ioctl_vmeth,
571 };
572 
573 #endif /* defined(CONFIG_POSIX_DEVICE_IO) */
574