1 /*
2 * This library is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU Lesser General Public
4 * License as published by the Free Software Foundation;
5 * version 2.1 of the License.
6 *
7 * This library is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * Lesser General Public License for more details.
11 *
12 * You should have received a copy of the GNU Lesser General Public
13 * License along with this library; If not, see <http://www.gnu.org/licenses/>.
14 *
15 * Split out from xc_linus_osdep.c:
16 *
17 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
18 */
19
20 #include <alloca.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <unistd.h>
24 #include <string.h>
25
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 #include <xen-tools/common-macros.h>
29
30 #include "private.h"
31
32 #ifndef O_CLOEXEC
33 #define O_CLOEXEC 0
34 #endif
35
osdep_xenforeignmemory_open(xenforeignmemory_handle * fmem)36 int osdep_xenforeignmemory_open(xenforeignmemory_handle *fmem)
37 {
38 int fd;
39
40 /* prefer this newer interface */
41 fd = open("/dev/xen/privcmd", O_RDWR|O_CLOEXEC);
42
43 if ( fd == -1 && ( errno == ENOENT || errno == ENXIO || errno == ENODEV ))
44 {
45 /* Fallback to /proc/xen/privcmd */
46 fd = open("/proc/xen/privcmd", O_RDWR|O_CLOEXEC);
47 }
48
49 if ( fd == -1 )
50 {
51 PERROR("Could not obtain handle on privileged command interface");
52 return -1;
53 }
54
55 /*
56 * Older versions of privcmd return -EINVAL for unimplemented ioctls
57 * so we need to probe for the errno to use rather than just using
58 * the conventional ENOTTY.
59 */
60 if ( ioctl(fd, IOCTL_PRIVCMD_UNIMPLEMENTED, NULL) >= 0 )
61 {
62 xtl_log(fmem->logger, XTL_ERROR, -1, "xenforeignmemory",
63 "privcmd ioctl should not be implemented");
64 close(fd);
65 return -1;
66 }
67 else
68 {
69 fmem->unimpl_errno = errno;
70 errno = 0;
71 }
72
73 fmem->fd = fd;
74 return 0;
75 }
76
osdep_xenforeignmemory_close(xenforeignmemory_handle * fmem)77 int osdep_xenforeignmemory_close(xenforeignmemory_handle *fmem)
78 {
79 int fd = fmem->fd;
80 if (fd == -1)
81 return 0;
82 return close(fd);
83 }
84
map_foreign_batch_single(int fd,uint32_t dom,xen_pfn_t * mfn,unsigned long addr)85 static int map_foreign_batch_single(int fd, uint32_t dom,
86 xen_pfn_t *mfn, unsigned long addr)
87 {
88 privcmd_mmapbatch_t ioctlx;
89 int rc;
90
91 ioctlx.num = 1;
92 ioctlx.dom = dom;
93 ioctlx.addr = addr;
94 ioctlx.arr = mfn;
95
96 do
97 {
98 *mfn ^= PRIVCMD_MMAPBATCH_PAGED_ERROR;
99 usleep(100);
100 rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
101 }
102 while ( (rc < 0) && (errno == ENOENT) );
103
104 return rc;
105 }
106
107 /*
108 * Retry mmap of all paged gfns in batches
109 * retuns < 0 on fatal error
110 * returns 0 if all gfns left paging state
111 * returns > 0 if some gfns are still in paging state
112 *
113 * Walk all gfns and try to assemble blocks of gfns in paging state.
114 * This will keep the request ring full and avoids delays.
115 */
retry_paged(int fd,uint32_t dom,void * addr,const xen_pfn_t * arr,int * err,size_t num)116 static int retry_paged(int fd, uint32_t dom, void *addr,
117 const xen_pfn_t *arr, int *err, size_t num)
118 {
119 privcmd_mmapbatch_v2_t ioctlx;
120 int rc, paged = 0;
121 size_t i = 0;
122
123 do
124 {
125 /* Skip gfns not in paging state */
126 if ( err[i] != -ENOENT )
127 {
128 i++;
129 continue;
130 }
131
132 paged++;
133
134 /* At least one gfn is still in paging state */
135 ioctlx.num = 1;
136 ioctlx.dom = dom;
137 ioctlx.addr = (unsigned long)addr + (i<<XC_PAGE_SHIFT);
138 ioctlx.arr = arr + i;
139 ioctlx.err = err + i;
140
141 /* Assemble a batch of requests */
142 while ( ++i < num )
143 {
144 if ( err[i] != -ENOENT )
145 break;
146 ioctlx.num++;
147 }
148
149 /* Send request and abort on fatal error */
150 rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
151 if ( rc < 0 && errno != ENOENT )
152 goto out;
153
154 } while ( i < num );
155
156 rc = paged;
157 out:
158 return rc;
159 }
160
osdep_xenforeignmemory_map(xenforeignmemory_handle * fmem,uint32_t dom,void * addr,int prot,int flags,size_t num,const xen_pfn_t arr[],int err[])161 void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem,
162 uint32_t dom, void *addr,
163 int prot, int flags, size_t num,
164 const xen_pfn_t arr[/*num*/], int err[/*num*/])
165 {
166 int fd = fmem->fd;
167 privcmd_mmapbatch_v2_t ioctlx;
168 size_t i;
169 int rc;
170
171 addr = mmap(addr, num << XC_PAGE_SHIFT, prot, flags | MAP_SHARED,
172 fd, 0);
173 if ( addr == MAP_FAILED )
174 return NULL;
175
176 ioctlx.num = num;
177 ioctlx.dom = dom;
178 ioctlx.addr = (unsigned long)addr;
179 ioctlx.arr = arr;
180 ioctlx.err = err;
181
182 rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
183
184 /* Command was recognized, some gfn in arr are in paging state */
185 if ( rc < 0 && errno == ENOENT )
186 {
187 do {
188 usleep(100);
189 rc = retry_paged(fd, dom, addr, arr, err, num);
190 } while ( rc > 0 );
191 }
192 /* Command was not recognized, use fall back */
193 else if ( rc < 0 && errno == EINVAL && (int)num > 0 )
194 {
195 /*
196 * IOCTL_PRIVCMD_MMAPBATCH_V2 is not supported - fall back to
197 * IOCTL_PRIVCMD_MMAPBATCH.
198 */
199 privcmd_mmapbatch_t ioctlx;
200 xen_pfn_t *pfn;
201 unsigned int pfn_arr_size = ROUNDUP((num * sizeof(*pfn)), XC_PAGE_SHIFT);
202 int os_page_size = sysconf(_SC_PAGESIZE);
203
204 if ( pfn_arr_size <= os_page_size )
205 pfn = alloca(num * sizeof(*pfn));
206 else
207 {
208 pfn = mmap(NULL, pfn_arr_size, PROT_READ | PROT_WRITE,
209 MAP_PRIVATE | MAP_ANON | MAP_POPULATE, -1, 0);
210 if ( pfn == MAP_FAILED )
211 {
212 PERROR("mmap of pfn array failed");
213 (void)munmap(addr, num << XC_PAGE_SHIFT);
214 return NULL;
215 }
216 }
217
218 memcpy(pfn, arr, num * sizeof(*arr));
219
220 ioctlx.num = num;
221 ioctlx.dom = dom;
222 ioctlx.addr = (unsigned long)addr;
223 ioctlx.arr = pfn;
224
225 rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
226
227 rc = rc < 0 ? -errno : 0;
228
229 for ( i = 0; i < num; ++i )
230 {
231 switch ( pfn[i] ^ arr[i] )
232 {
233 case 0:
234 err[i] = rc != -ENOENT ? rc : 0;
235 continue;
236 default:
237 err[i] = -EINVAL;
238 continue;
239 case PRIVCMD_MMAPBATCH_PAGED_ERROR:
240 if ( rc != -ENOENT )
241 {
242 err[i] = rc ?: -EINVAL;
243 continue;
244 }
245 rc = map_foreign_batch_single(fd, dom, pfn + i,
246 (unsigned long)addr + (i<<XC_PAGE_SHIFT));
247 if ( rc < 0 )
248 {
249 rc = -errno;
250 break;
251 }
252 rc = -ENOENT;
253 continue;
254 }
255 break;
256 }
257
258 if ( pfn_arr_size > os_page_size )
259 munmap(pfn, pfn_arr_size);
260
261 if ( rc == -ENOENT && i == num )
262 rc = 0;
263 else if ( rc )
264 {
265 errno = -rc;
266 rc = -1;
267 }
268 }
269
270 if ( rc < 0 )
271 {
272 int saved_errno = errno;
273
274 (void)munmap(addr, num << XC_PAGE_SHIFT);
275 errno = saved_errno;
276 return NULL;
277 }
278
279 return addr;
280 }
281
osdep_xenforeignmemory_unmap(xenforeignmemory_handle * fmem,void * addr,size_t num)282 int osdep_xenforeignmemory_unmap(xenforeignmemory_handle *fmem,
283 void *addr, size_t num)
284 {
285 return munmap(addr, num << XC_PAGE_SHIFT);
286 }
287
osdep_xenforeignmemory_restrict(xenforeignmemory_handle * fmem,domid_t domid)288 int osdep_xenforeignmemory_restrict(xenforeignmemory_handle *fmem,
289 domid_t domid)
290 {
291 return ioctl(fmem->fd, IOCTL_PRIVCMD_RESTRICT, &domid);
292 }
293
osdep_xenforeignmemory_unmap_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)294 int osdep_xenforeignmemory_unmap_resource(
295 xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
296 {
297 return fres ? munmap(fres->addr, fres->nr_frames << XC_PAGE_SHIFT) : 0;
298 }
299
osdep_xenforeignmemory_map_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)300 int osdep_xenforeignmemory_map_resource(
301 xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
302 {
303 privcmd_mmap_resource_t mr = {
304 .dom = fres->domid,
305 .type = fres->type,
306 .id = fres->id,
307 .idx = fres->frame,
308 .num = fres->nr_frames,
309 };
310 int rc;
311
312 if ( !fres->addr && !fres->nr_frames )
313 /* Request for resource size. Skip mmap(). */
314 goto skip_mmap;
315
316 fres->addr = mmap(fres->addr, fres->nr_frames << XC_PAGE_SHIFT,
317 fres->prot, fres->flags | MAP_SHARED, fmem->fd, 0);
318 if ( fres->addr == MAP_FAILED )
319 return -1;
320
321 mr.addr = (uintptr_t)fres->addr;
322
323 skip_mmap:
324 rc = ioctl(fmem->fd, IOCTL_PRIVCMD_MMAP_RESOURCE, &mr);
325 if ( rc )
326 {
327 int saved_errno;
328
329 if ( errno == fmem->unimpl_errno )
330 errno = EOPNOTSUPP;
331
332 if ( fres->addr )
333 {
334 saved_errno = errno;
335 osdep_xenforeignmemory_unmap_resource(fmem, fres);
336 errno = saved_errno;
337 }
338
339 return -1;
340 }
341
342 /* If requesting size, copy back. */
343 if ( !fres->addr )
344 fres->nr_frames = mr.num;
345
346 return 0;
347 }
348
349 /*
350 * Local variables:
351 * mode: C
352 * c-file-style: "BSD"
353 * c-basic-offset: 4
354 * tab-width: 4
355 * indent-tabs-mode: nil
356 * End:
357 */
358