1 /*
2     Internal interfaces for Xen Store Daemon.
3     Copyright (C) 2005 Rusty Russell IBM Corporation
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #ifndef _XENSTORED_CORE_H
20 #define _XENSTORED_CORE_H
21 
22 #include <xenctrl.h>
23 #include <xengnttab.h>
24 
25 #include <sys/types.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <poll.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 #include <time.h>
32 #include <errno.h>
33 
34 #include "xenstore_lib.h"
35 #include "xenstore_state.h"
36 #include "list.h"
37 #include "hashtable.h"
38 
39 #define XENSTORE_LIB_DIR	XEN_LIB_DIR "/xenstore"
40 
41 #ifndef O_CLOEXEC
42 #define O_CLOEXEC 0
43 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
44 #ifndef __MINIOS__
45 #define NO_LIVE_UPDATE
46 #endif
47 #endif
48 
49 /* DEFAULT_BUFFER_SIZE should be large enough for each errno string. */
50 #define DEFAULT_BUFFER_SIZE 16
51 
52 struct xs_state_connection;
53 
54 struct buffered_data
55 {
56 	struct list_head list;
57 	bool on_out_list;
58 	bool on_ref_list;
59 
60 	/* Are we still doing the header? */
61 	bool inhdr;
62 
63 	/* Is this a watch event? */
64 	bool watch_event;
65 
66 	/* How far are we? */
67 	unsigned int used;
68 
69 	/* Outstanding request accounting. */
70 	union {
71 		/* ref is being used for requests. */
72 		struct {
73 			unsigned int event_cnt; /* # of outstanding events. */
74 			unsigned int domid;     /* domid of request. */
75 		} ref;
76 		/* req is being used for watch events. */
77 		struct buffered_data *req;      /* request causing event. */
78 	} pend;
79 
80 	union {
81 		struct xsd_sockmsg msg;
82 		char raw[sizeof(struct xsd_sockmsg)];
83 	} hdr;
84 
85 	uint64_t timeout_msec;
86 
87 	/* The actual data. */
88 	char *buffer;
89 	char default_buffer[DEFAULT_BUFFER_SIZE];
90 };
91 
92 struct delayed_request {
93 	/* Next delayed request. */
94 	struct list_head list;
95 
96 	/* The delayed request. */
97 	struct buffered_data *in;
98 
99 	/* Function to call. */
100 	bool (*func)(struct delayed_request *req);
101 
102 	/* Further data. */
103 	void *data;
104 };
105 
106 struct connection;
107 
108 struct interface_funcs {
109 	int (*write)(struct connection *, const void *, unsigned int);
110 	int (*read)(struct connection *, void *, unsigned int);
111 	bool (*can_write)(struct connection *);
112 	bool (*can_read)(struct connection *);
113 };
114 
115 struct connection
116 {
117 	struct list_head list;
118 
119 	/* The file descriptor we came in on. */
120 	int fd;
121 	/* The index of pollfd in global pollfd array */
122 	int pollfd_idx;
123 
124 	/* Who am I? Domid of connection. */
125 	unsigned int id;
126 
127 	/* Is this connection ignored? */
128 	bool is_ignored;
129 
130 	/* Is the connection stalled? */
131 	bool is_stalled;
132 
133 	/* Buffered incoming data. */
134 	struct buffered_data *in;
135 
136 	/* Buffered output data */
137 	struct list_head out_list;
138 	uint64_t timeout_msec;
139 
140 	/* Not yet committed accounting data (valid if in != NULL). */
141 	struct list_head acc_list;
142 
143 	/* Referenced requests no longer pending. */
144 	struct list_head ref_list;
145 
146 	/* Transaction context for current request (NULL if none). */
147 	struct transaction *transaction;
148 
149 	/* List of in-progress transactions. */
150 	struct list_head transaction_list;
151 	uint32_t next_transaction_id;
152 	time_t ta_start_time;
153 
154 	/* List of delayed requests. */
155 	struct list_head delayed;
156 
157 	/* The domain I'm associated with, if any. */
158 	struct domain *domain;
159 
160         /* The target of the domain I'm associated with. */
161         struct connection *target;
162 
163 	/* My watches. */
164 	struct list_head watches;
165 
166 	/* Methods for communicating over this connection. */
167 	const struct interface_funcs *funcs;
168 
169 	/* Support for live update: connection id. */
170 	unsigned int conn_id;
171 };
172 extern struct list_head connections;
173 
174 /*
175  * Header of the node record in the data base.
176  * In the data base the memory of the node is a single memory chunk with the
177  * following format:
178  * struct {
179  *     node_hdr hdr;
180  *     struct xs_permissions perms[hdr.num_perms];
181  *     char data[hdr.datalen];
182  *     char children[hdr.childlen];
183  * };
184  */
185 struct node_hdr {
186 	uint64_t generation;
187 #define NO_GENERATION ~((uint64_t)0)
188 	uint16_t num_perms;
189 	uint16_t datalen;
190 	uint32_t childlen;
191 };
192 
193 struct node_perms {
194 	unsigned int num;
195 	struct xs_permissions *p;
196 };
197 
198 struct node_account_data {
199 	unsigned int domid;
200 	int memory;		/* -1 if unknown */
201 };
202 
203 struct node {
204 	/* Copied to/from data base. */
205 	struct node_hdr hdr;
206 
207 	/* Xenstore path. */
208 	const char *name;
209 	/* Name used to access data base. */
210 	const char *db_name;
211 
212 	/* Parent (optional) */
213 	struct node *parent;
214 
215 	/* Permissions. */
216 	struct xs_permissions *perms;
217 
218 	/* Contents. */
219 	void *data;
220 
221 	/* Children, each nul-terminated. */
222 	unsigned int childoff;	/* Used by walk_node_tree() internally. */
223 	char *children;
224 
225 	/* Allocation information for node currently in store. */
226 	struct node_account_data acc;
227 };
228 
229 /* Return the only argument in the input. */
230 const char *onearg(struct buffered_data *in);
231 
232 /* Break input into vectors, return the number, fill in up to num of them. */
233 unsigned int get_strings(struct buffered_data *data,
234 			 const char *vec[], unsigned int num);
235 unsigned int get_string(const struct buffered_data *data, unsigned int offset);
236 
237 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
238 		const void *data, unsigned int len);
239 void send_event(struct buffered_data *req, struct connection *conn,
240 		const char *path, const char *token);
241 
242 /* Some routines (write, mkdir, etc) just need a non-error return */
243 void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
244 
245 /* Canonicalize this path if possible. */
246 const char *canonicalize(struct connection *conn, const void *ctx,
247 			 const char *node, bool allow_special);
248 
249 /* Get access permissions. */
250 unsigned int perm_for_conn(struct connection *conn,
251 			   const struct node_perms *perms);
252 
253 /* Get owner of a node. */
get_node_owner(const struct node * node)254 static inline unsigned int get_node_owner(const struct node *node)
255 {
256 	return node->perms[0].id;
257 }
258 
259 /* Transfer permissions from node to struct node_perms. */
node_to_node_perms(const struct node * node,struct node_perms * perms)260 static inline void node_to_node_perms(const struct node *node,
261 				      struct node_perms *perms)
262 {
263 	perms->num = node->hdr.num_perms;
264 	perms->p = node->perms;
265 }
266 
perm_for_conn_from_node(struct connection * conn,const struct node * node)267 static inline unsigned int perm_for_conn_from_node(struct connection *conn,
268 						   const struct node *node)
269 {
270 	struct node_perms perms;
271 
272 	node_to_node_perms(node, &perms);
273 
274 	return perm_for_conn(conn, &perms);
275 }
276 
277 /* Transfer permissions from struct node_perms to node. */
node_perms_to_node(const struct node_perms * perms,struct node * node)278 static inline void node_perms_to_node(const struct node_perms *perms,
279 				      struct node *node)
280 {
281 	node->hdr.num_perms = perms->num;
282 	node->perms = perms->p;
283 }
284 
285 /* Write a node to the data base. */
286 enum write_node_mode {
287 	NODE_CREATE,
288 	NODE_MODIFY
289 };
290 
291 int write_node_raw(struct connection *conn, const char *db_name,
292 		   struct node *node, enum write_node_mode mode,
293 		   bool no_quota_check);
294 
295 /* Get a node from the data base. */
296 struct node *read_node(struct connection *conn, const void *ctx,
297 		       const char *name);
298 const struct node *read_node_const(struct connection *conn, const void *ctx,
299 				   const char *name);
300 
301 /* Remove a node and its children. */
302 int rm_node(struct connection *conn, const void *ctx, const char *name);
303 
304 void setup_structure(bool live_update);
305 struct connection *new_connection(const struct interface_funcs *funcs);
306 struct connection *add_socket_connection(int fd);
307 struct connection *get_connection_by_id(unsigned int conn_id);
308 void check_store(void);
309 void corrupt(struct connection *conn, const char *fmt, ...);
310 
311 /* Get name of parent node. */
312 char *get_parent(const void *ctx, const char *node);
313 
314 /* Delay a request. */
315 int delay_request(struct connection *conn, struct buffered_data *in,
316 		  bool (*func)(struct delayed_request *), void *data,
317 		  bool no_quota_check);
318 
319 /* Tracing infrastructure. */
320 void trace_create(const void *data, const char *type);
321 void trace_destroy(const void *data, const char *type);
322 void trace(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
323 void reopen_log(void);
324 void close_log(void);
325 
326 #define log(...)							\
327 	do {								\
328 		int _saved_errno = errno;				\
329 		char *s = talloc_asprintf(NULL, __VA_ARGS__);		\
330 		if (s) {						\
331 			trace("%s\n", s);				\
332 			syslog(LOG_ERR, "%s\n",	s);			\
333 			talloc_free(s);					\
334 		} else {						\
335 			trace("talloc failure during logging\n");	\
336 			syslog(LOG_ERR, "talloc failure during logging\n"); \
337 		}							\
338 		errno = _saved_errno;					\
339 	} while (0)
340 
341 extern int orig_argc;
342 extern char **orig_argv;
343 
344 extern const char *tracefile;
345 extern int tracefd;
346 
347 /* Trace flag values must be kept in sync with trace_switches[] contents. */
348 extern unsigned int trace_flags;
349 #define TRACE_OBJ	0x00000001
350 #define TRACE_IO	0x00000002
351 #define TRACE_WRL	0x00000004
352 #define TRACE_ACC	0x00000008
353 #define TRACE_TDB	0x00000010
354 extern const char *const trace_switches[];
355 int set_trace_switch(const char *arg);
356 
357 #define trace_tdb(...)				\
358 do {						\
359 	if (trace_flags & TRACE_TDB)		\
360 		trace("tdb: " __VA_ARGS__);	\
361 } while (0)
362 
363 extern int dom0_domid;
364 extern int dom0_event;
365 extern int priv_domid;
366 extern domid_t stub_domid;
367 extern bool keep_orphans;
368 
369 extern struct pollfd *poll_fds;
370 
371 extern unsigned int timeout_watch_event_msec;
372 
373 /* Get internal time in milliseconds. */
374 uint64_t get_now_msec(void);
375 
376 /* Map the kernel's xenstore page. */
377 void *xenbus_map(void);
378 void unmap_xenbus(void *interface);
379 
xenbus_master_domid(void)380 static inline int xenbus_master_domid(void) { return dom0_domid; }
381 
domid_is_unprivileged(unsigned int domid)382 static inline bool domid_is_unprivileged(unsigned int domid)
383 {
384 	return domid != dom0_domid && domid != priv_domid;
385 }
386 
domain_is_unprivileged(const struct connection * conn)387 static inline bool domain_is_unprivileged(const struct connection *conn)
388 {
389 	return conn && domid_is_unprivileged(conn->id);
390 }
391 
392 /* Return the event channel used by xenbus. */
393 evtchn_port_t get_xenbus_evtchn(void);
394 void early_init(bool live_update, bool dofork, const char *pidfile);
395 void late_init(bool live_update);
396 
397 int set_fd(int fd, short events);
398 void set_special_fds(void);
399 void handle_special_fds(void);
400 
401 int get_socket_fd(void);
402 void set_socket_fd(int fd);
403 
404 #ifdef __MINIOS__
405 void mount_9pfs(void);
406 #endif
407 
408 const char *xenstore_rundir(void);
409 const char *absolute_filename(const void *ctx, const char *filename);
410 
411 /* Close stdin/stdout/stderr to complete daemonize */
412 void finish_daemonize(void);
413 
414 extern xengnttab_handle **xgt_handle;
415 
416 int remember_string(struct hashtable *hash, const char *str);
417 
418 /* Data base access functions. */
419 const struct node_hdr *db_fetch(const char *db_name, size_t *size);
420 int db_write(struct connection *conn, const char *db_name, void *data,
421 	     size_t size, struct node_account_data *acc,
422 	     enum write_node_mode mode, bool no_quota_check);
423 void db_delete(struct connection *conn, const char *name,
424 	       struct node_account_data *acc);
425 
426 void conn_free_buffered_data(struct connection *conn);
427 
428 const char *dump_state_global(FILE *fp);
429 const char *dump_state_buffered_data(FILE *fp, const struct connection *c,
430 				     struct xs_state_connection *sc);
431 const char *dump_state_nodes(FILE *fp, const void *ctx);
432 const char *dump_state_node_perms(FILE *fp, const struct xs_permissions *perms,
433 				  unsigned int n_perms);
434 
435 void read_state_global(const void *ctx, const void *state);
436 void read_state_buffered_data(const void *ctx, struct connection *conn,
437 			      const struct xs_state_connection *sc);
438 void read_state_node(const void *ctx, const void *state);
439 
440 /*
441  * Walk the node tree below root calling funcs->enter() and funcs->exit() for
442  * each node. funcs->enter() is being called when entering a node, so before
443  * any of the children of the node is processed. funcs->exit() is being
444  * called when leaving the node, so after all children have been processed.
445  * funcs->enoent() is being called when a node isn't existing.
446  * funcs->*() return values:
447  *  < 0: tree walk is stopped, walk_node_tree() returns funcs->*() return value
448  *       in case WALK_TREE_ERROR_STOP is returned, errno should be set
449  *  WALK_TREE_OK: tree walk is continuing
450  *  WALK_TREE_SKIP_CHILDREN: tree walk won't descend below current node, but
451  *       walk continues
452  *  WALK_TREE_RM_CHILDENTRY: Remove the child entry from its parent and write
453  *       the modified parent node back to the data base, implies to not descend
454  *       below the current node, but to continue the walk
455  * funcs->*() is allowed to modify the node it is called for in the data base.
456  * In case funcs->enter() is deleting the node, it must not return WALK_TREE_OK
457  * in order to avoid descending into no longer existing children.
458  */
459 /* Return values for funcs->*() and walk_node_tree(). */
460 #define WALK_TREE_SUCCESS_STOP  -100    /* Stop walk early, no error. */
461 #define WALK_TREE_ERROR_STOP    -1      /* Stop walk due to error. */
462 #define WALK_TREE_OK            0       /* No error. */
463 /* Return value for funcs->*() only. */
464 #define WALK_TREE_SKIP_CHILDREN 1       /* Don't recurse below current node. */
465 #define WALK_TREE_RM_CHILDENTRY 2       /* Remove child entry from parent. */
466 
467 struct walk_funcs {
468 	int (*enter)(const void *ctx, struct connection *conn,
469 		     struct node *node, void *arg);
470 	int (*exit)(const void *ctx, struct connection *conn,
471 		    struct node *node, void *arg);
472 	int (*enoent)(const void *ctx, struct connection *conn,
473 		      struct node *parent, char *name, void *arg);
474 };
475 
476 int walk_node_tree(const void *ctx, struct connection *conn, const char *root,
477 		   struct walk_funcs *funcs, void *arg);
478 
479 #endif /* _XENSTORED_CORE_H */
480 
481 /*
482  * Local variables:
483  *  mode: C
484  *  c-file-style: "linux"
485  *  indent-tabs-mode: t
486  *  c-basic-offset: 8
487  *  tab-width: 8
488  * End:
489  */
490