1 /*
2 Internal interfaces for Xen Store Daemon.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #ifndef _XENSTORED_CORE_H
20 #define _XENSTORED_CORE_H
21
22 #include <xenctrl.h>
23 #include <xengnttab.h>
24
25 #include <sys/types.h>
26 #include <dirent.h>
27 #include <fcntl.h>
28 #include <poll.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 #include <time.h>
32 #include <errno.h>
33
34 #include "xenstore_lib.h"
35 #include "xenstore_state.h"
36 #include "list.h"
37 #include "hashtable.h"
38
39 #define XENSTORE_LIB_DIR XEN_LIB_DIR "/xenstore"
40
41 #ifndef O_CLOEXEC
42 #define O_CLOEXEC 0
43 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
44 #ifndef __MINIOS__
45 #define NO_LIVE_UPDATE
46 #endif
47 #endif
48
49 /* DEFAULT_BUFFER_SIZE should be large enough for each errno string. */
50 #define DEFAULT_BUFFER_SIZE 16
51
52 struct xs_state_connection;
53
54 struct buffered_data
55 {
56 struct list_head list;
57 bool on_out_list;
58 bool on_ref_list;
59
60 /* Are we still doing the header? */
61 bool inhdr;
62
63 /* Is this a watch event? */
64 bool watch_event;
65
66 /* How far are we? */
67 unsigned int used;
68
69 /* Outstanding request accounting. */
70 union {
71 /* ref is being used for requests. */
72 struct {
73 unsigned int event_cnt; /* # of outstanding events. */
74 unsigned int domid; /* domid of request. */
75 } ref;
76 /* req is being used for watch events. */
77 struct buffered_data *req; /* request causing event. */
78 } pend;
79
80 union {
81 struct xsd_sockmsg msg;
82 char raw[sizeof(struct xsd_sockmsg)];
83 } hdr;
84
85 uint64_t timeout_msec;
86
87 /* The actual data. */
88 char *buffer;
89 char default_buffer[DEFAULT_BUFFER_SIZE];
90 };
91
92 struct delayed_request {
93 /* Next delayed request. */
94 struct list_head list;
95
96 /* The delayed request. */
97 struct buffered_data *in;
98
99 /* Function to call. */
100 bool (*func)(struct delayed_request *req);
101
102 /* Further data. */
103 void *data;
104 };
105
106 struct connection;
107
108 struct interface_funcs {
109 int (*write)(struct connection *, const void *, unsigned int);
110 int (*read)(struct connection *, void *, unsigned int);
111 bool (*can_write)(struct connection *);
112 bool (*can_read)(struct connection *);
113 };
114
115 struct connection
116 {
117 struct list_head list;
118
119 /* The file descriptor we came in on. */
120 int fd;
121 /* The index of pollfd in global pollfd array */
122 int pollfd_idx;
123
124 /* Who am I? Domid of connection. */
125 unsigned int id;
126
127 /* Is this connection ignored? */
128 bool is_ignored;
129
130 /* Is the connection stalled? */
131 bool is_stalled;
132
133 /* Buffered incoming data. */
134 struct buffered_data *in;
135
136 /* Buffered output data */
137 struct list_head out_list;
138 uint64_t timeout_msec;
139
140 /* Not yet committed accounting data (valid if in != NULL). */
141 struct list_head acc_list;
142
143 /* Referenced requests no longer pending. */
144 struct list_head ref_list;
145
146 /* Transaction context for current request (NULL if none). */
147 struct transaction *transaction;
148
149 /* List of in-progress transactions. */
150 struct list_head transaction_list;
151 uint32_t next_transaction_id;
152 time_t ta_start_time;
153
154 /* List of delayed requests. */
155 struct list_head delayed;
156
157 /* The domain I'm associated with, if any. */
158 struct domain *domain;
159
160 /* The target of the domain I'm associated with. */
161 struct connection *target;
162
163 /* My watches. */
164 struct list_head watches;
165
166 /* Methods for communicating over this connection. */
167 const struct interface_funcs *funcs;
168
169 /* Support for live update: connection id. */
170 unsigned int conn_id;
171 };
172 extern struct list_head connections;
173
174 /*
175 * Header of the node record in the data base.
176 * In the data base the memory of the node is a single memory chunk with the
177 * following format:
178 * struct {
179 * node_hdr hdr;
180 * struct xs_permissions perms[hdr.num_perms];
181 * char data[hdr.datalen];
182 * char children[hdr.childlen];
183 * };
184 */
185 struct node_hdr {
186 uint64_t generation;
187 #define NO_GENERATION ~((uint64_t)0)
188 uint16_t num_perms;
189 uint16_t datalen;
190 uint32_t childlen;
191 };
192
193 struct node_perms {
194 unsigned int num;
195 struct xs_permissions *p;
196 };
197
198 struct node_account_data {
199 unsigned int domid;
200 int memory; /* -1 if unknown */
201 };
202
203 struct node {
204 /* Copied to/from data base. */
205 struct node_hdr hdr;
206
207 /* Xenstore path. */
208 const char *name;
209 /* Name used to access data base. */
210 const char *db_name;
211
212 /* Parent (optional) */
213 struct node *parent;
214
215 /* Permissions. */
216 struct xs_permissions *perms;
217
218 /* Contents. */
219 void *data;
220
221 /* Children, each nul-terminated. */
222 unsigned int childoff; /* Used by walk_node_tree() internally. */
223 char *children;
224
225 /* Allocation information for node currently in store. */
226 struct node_account_data acc;
227 };
228
229 /* Return the only argument in the input. */
230 const char *onearg(struct buffered_data *in);
231
232 /* Break input into vectors, return the number, fill in up to num of them. */
233 unsigned int get_strings(struct buffered_data *data,
234 const char *vec[], unsigned int num);
235 unsigned int get_string(const struct buffered_data *data, unsigned int offset);
236
237 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
238 const void *data, unsigned int len);
239 void send_event(struct buffered_data *req, struct connection *conn,
240 const char *path, const char *token);
241
242 /* Some routines (write, mkdir, etc) just need a non-error return */
243 void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
244
245 /* Canonicalize this path if possible. */
246 const char *canonicalize(struct connection *conn, const void *ctx,
247 const char *node, bool allow_special);
248
249 /* Get access permissions. */
250 unsigned int perm_for_conn(struct connection *conn,
251 const struct node_perms *perms);
252
253 /* Get owner of a node. */
get_node_owner(const struct node * node)254 static inline unsigned int get_node_owner(const struct node *node)
255 {
256 return node->perms[0].id;
257 }
258
259 /* Transfer permissions from node to struct node_perms. */
node_to_node_perms(const struct node * node,struct node_perms * perms)260 static inline void node_to_node_perms(const struct node *node,
261 struct node_perms *perms)
262 {
263 perms->num = node->hdr.num_perms;
264 perms->p = node->perms;
265 }
266
perm_for_conn_from_node(struct connection * conn,const struct node * node)267 static inline unsigned int perm_for_conn_from_node(struct connection *conn,
268 const struct node *node)
269 {
270 struct node_perms perms;
271
272 node_to_node_perms(node, &perms);
273
274 return perm_for_conn(conn, &perms);
275 }
276
277 /* Transfer permissions from struct node_perms to node. */
node_perms_to_node(const struct node_perms * perms,struct node * node)278 static inline void node_perms_to_node(const struct node_perms *perms,
279 struct node *node)
280 {
281 node->hdr.num_perms = perms->num;
282 node->perms = perms->p;
283 }
284
285 /* Write a node to the data base. */
286 enum write_node_mode {
287 NODE_CREATE,
288 NODE_MODIFY
289 };
290
291 int write_node_raw(struct connection *conn, const char *db_name,
292 struct node *node, enum write_node_mode mode,
293 bool no_quota_check);
294
295 /* Get a node from the data base. */
296 struct node *read_node(struct connection *conn, const void *ctx,
297 const char *name);
298 const struct node *read_node_const(struct connection *conn, const void *ctx,
299 const char *name);
300
301 /* Remove a node and its children. */
302 int rm_node(struct connection *conn, const void *ctx, const char *name);
303
304 void setup_structure(bool live_update);
305 struct connection *new_connection(const struct interface_funcs *funcs);
306 struct connection *add_socket_connection(int fd);
307 struct connection *get_connection_by_id(unsigned int conn_id);
308 void check_store(void);
309 void corrupt(struct connection *conn, const char *fmt, ...);
310
311 /* Get name of parent node. */
312 char *get_parent(const void *ctx, const char *node);
313
314 /* Delay a request. */
315 int delay_request(struct connection *conn, struct buffered_data *in,
316 bool (*func)(struct delayed_request *), void *data,
317 bool no_quota_check);
318
319 /* Tracing infrastructure. */
320 void trace_create(const void *data, const char *type);
321 void trace_destroy(const void *data, const char *type);
322 void trace(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
323 void reopen_log(void);
324 void close_log(void);
325
326 #define log(...) \
327 do { \
328 int _saved_errno = errno; \
329 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
330 if (s) { \
331 trace("%s\n", s); \
332 syslog(LOG_ERR, "%s\n", s); \
333 talloc_free(s); \
334 } else { \
335 trace("talloc failure during logging\n"); \
336 syslog(LOG_ERR, "talloc failure during logging\n"); \
337 } \
338 errno = _saved_errno; \
339 } while (0)
340
341 extern int orig_argc;
342 extern char **orig_argv;
343
344 extern const char *tracefile;
345 extern int tracefd;
346
347 /* Trace flag values must be kept in sync with trace_switches[] contents. */
348 extern unsigned int trace_flags;
349 #define TRACE_OBJ 0x00000001
350 #define TRACE_IO 0x00000002
351 #define TRACE_WRL 0x00000004
352 #define TRACE_ACC 0x00000008
353 #define TRACE_TDB 0x00000010
354 extern const char *const trace_switches[];
355 int set_trace_switch(const char *arg);
356
357 #define trace_tdb(...) \
358 do { \
359 if (trace_flags & TRACE_TDB) \
360 trace("tdb: " __VA_ARGS__); \
361 } while (0)
362
363 extern int dom0_domid;
364 extern int dom0_event;
365 extern int priv_domid;
366 extern domid_t stub_domid;
367 extern bool keep_orphans;
368
369 extern struct pollfd *poll_fds;
370
371 extern unsigned int timeout_watch_event_msec;
372
373 /* Get internal time in milliseconds. */
374 uint64_t get_now_msec(void);
375
376 /* Map the kernel's xenstore page. */
377 void *xenbus_map(void);
378 void unmap_xenbus(void *interface);
379
xenbus_master_domid(void)380 static inline int xenbus_master_domid(void) { return dom0_domid; }
381
domid_is_unprivileged(unsigned int domid)382 static inline bool domid_is_unprivileged(unsigned int domid)
383 {
384 return domid != dom0_domid && domid != priv_domid;
385 }
386
domain_is_unprivileged(const struct connection * conn)387 static inline bool domain_is_unprivileged(const struct connection *conn)
388 {
389 return conn && domid_is_unprivileged(conn->id);
390 }
391
392 /* Return the event channel used by xenbus. */
393 evtchn_port_t get_xenbus_evtchn(void);
394 void early_init(bool live_update, bool dofork, const char *pidfile);
395 void late_init(bool live_update);
396
397 int set_fd(int fd, short events);
398 void set_special_fds(void);
399 void handle_special_fds(void);
400
401 int get_socket_fd(void);
402 void set_socket_fd(int fd);
403
404 #ifdef __MINIOS__
405 void mount_9pfs(void);
406 #endif
407
408 const char *xenstore_rundir(void);
409 const char *absolute_filename(const void *ctx, const char *filename);
410
411 /* Close stdin/stdout/stderr to complete daemonize */
412 void finish_daemonize(void);
413
414 extern xengnttab_handle **xgt_handle;
415
416 int remember_string(struct hashtable *hash, const char *str);
417
418 /* Data base access functions. */
419 const struct node_hdr *db_fetch(const char *db_name, size_t *size);
420 int db_write(struct connection *conn, const char *db_name, void *data,
421 size_t size, struct node_account_data *acc,
422 enum write_node_mode mode, bool no_quota_check);
423 void db_delete(struct connection *conn, const char *name,
424 struct node_account_data *acc);
425
426 void conn_free_buffered_data(struct connection *conn);
427
428 const char *dump_state_global(FILE *fp);
429 const char *dump_state_buffered_data(FILE *fp, const struct connection *c,
430 struct xs_state_connection *sc);
431 const char *dump_state_nodes(FILE *fp, const void *ctx);
432 const char *dump_state_node_perms(FILE *fp, const struct xs_permissions *perms,
433 unsigned int n_perms);
434
435 void read_state_global(const void *ctx, const void *state);
436 void read_state_buffered_data(const void *ctx, struct connection *conn,
437 const struct xs_state_connection *sc);
438 void read_state_node(const void *ctx, const void *state);
439
440 /*
441 * Walk the node tree below root calling funcs->enter() and funcs->exit() for
442 * each node. funcs->enter() is being called when entering a node, so before
443 * any of the children of the node is processed. funcs->exit() is being
444 * called when leaving the node, so after all children have been processed.
445 * funcs->enoent() is being called when a node isn't existing.
446 * funcs->*() return values:
447 * < 0: tree walk is stopped, walk_node_tree() returns funcs->*() return value
448 * in case WALK_TREE_ERROR_STOP is returned, errno should be set
449 * WALK_TREE_OK: tree walk is continuing
450 * WALK_TREE_SKIP_CHILDREN: tree walk won't descend below current node, but
451 * walk continues
452 * WALK_TREE_RM_CHILDENTRY: Remove the child entry from its parent and write
453 * the modified parent node back to the data base, implies to not descend
454 * below the current node, but to continue the walk
455 * funcs->*() is allowed to modify the node it is called for in the data base.
456 * In case funcs->enter() is deleting the node, it must not return WALK_TREE_OK
457 * in order to avoid descending into no longer existing children.
458 */
459 /* Return values for funcs->*() and walk_node_tree(). */
460 #define WALK_TREE_SUCCESS_STOP -100 /* Stop walk early, no error. */
461 #define WALK_TREE_ERROR_STOP -1 /* Stop walk due to error. */
462 #define WALK_TREE_OK 0 /* No error. */
463 /* Return value for funcs->*() only. */
464 #define WALK_TREE_SKIP_CHILDREN 1 /* Don't recurse below current node. */
465 #define WALK_TREE_RM_CHILDENTRY 2 /* Remove child entry from parent. */
466
467 struct walk_funcs {
468 int (*enter)(const void *ctx, struct connection *conn,
469 struct node *node, void *arg);
470 int (*exit)(const void *ctx, struct connection *conn,
471 struct node *node, void *arg);
472 int (*enoent)(const void *ctx, struct connection *conn,
473 struct node *parent, char *name, void *arg);
474 };
475
476 int walk_node_tree(const void *ctx, struct connection *conn, const char *root,
477 struct walk_funcs *funcs, void *arg);
478
479 #endif /* _XENSTORED_CORE_H */
480
481 /*
482 * Local variables:
483 * mode: C
484 * c-file-style: "linux"
485 * indent-tabs-mode: t
486 * c-basic-offset: 8
487 * tab-width: 8
488 * End:
489 */
490