1 /*
2  * This library is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU Lesser General Public
4  * License as published by the Free Software Foundation;
5  * version 2.1 of the License.
6  *
7  * This library is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10  * Lesser General Public License for more details.
11  *
12  * You should have received a copy of the GNU Lesser General Public
13  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
14  *
15  * Split out from xc_linus_osdep.c:
16  *
17  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
18  */
19 
20 #include <alloca.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <unistd.h>
24 #include <string.h>
25 
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 #include <xen-tools/common-macros.h>
29 
30 #include "private.h"
31 
32 #ifndef O_CLOEXEC
33 #define O_CLOEXEC 0
34 #endif
35 
osdep_xenforeignmemory_open(xenforeignmemory_handle * fmem)36 int osdep_xenforeignmemory_open(xenforeignmemory_handle *fmem)
37 {
38     int fd;
39 
40     /* prefer this newer interface */
41     fd = open("/dev/xen/privcmd", O_RDWR|O_CLOEXEC);
42 
43     if ( fd == -1 && ( errno == ENOENT || errno == ENXIO || errno == ENODEV ))
44     {
45         /* Fallback to /proc/xen/privcmd */
46         fd = open("/proc/xen/privcmd", O_RDWR|O_CLOEXEC);
47     }
48 
49     if ( fd == -1 )
50     {
51         PERROR("Could not obtain handle on privileged command interface");
52         return -1;
53     }
54 
55     /*
56      * Older versions of privcmd return -EINVAL for unimplemented ioctls
57      * so we need to probe for the errno to use rather than just using
58      * the conventional ENOTTY.
59      */
60     if ( ioctl(fd, IOCTL_PRIVCMD_UNIMPLEMENTED, NULL) >= 0 )
61     {
62         xtl_log(fmem->logger, XTL_ERROR, -1, "xenforeignmemory",
63                 "privcmd ioctl should not be implemented");
64         close(fd);
65         return -1;
66     }
67     else
68     {
69         fmem->unimpl_errno = errno;
70         errno = 0;
71     }
72 
73     fmem->fd = fd;
74     return 0;
75 }
76 
osdep_xenforeignmemory_close(xenforeignmemory_handle * fmem)77 int osdep_xenforeignmemory_close(xenforeignmemory_handle *fmem)
78 {
79     int fd = fmem->fd;
80     if (fd == -1)
81         return 0;
82     return close(fd);
83 }
84 
map_foreign_batch_single(int fd,uint32_t dom,xen_pfn_t * mfn,unsigned long addr)85 static int map_foreign_batch_single(int fd, uint32_t dom,
86                                     xen_pfn_t *mfn, unsigned long addr)
87 {
88     privcmd_mmapbatch_t ioctlx;
89     int rc;
90 
91     ioctlx.num = 1;
92     ioctlx.dom = dom;
93     ioctlx.addr = addr;
94     ioctlx.arr = mfn;
95 
96     do
97     {
98         *mfn ^= PRIVCMD_MMAPBATCH_PAGED_ERROR;
99         usleep(100);
100         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
101     }
102     while ( (rc < 0) && (errno == ENOENT) );
103 
104     return rc;
105 }
106 
107 /*
108  * Retry mmap of all paged gfns in batches
109  * retuns < 0 on fatal error
110  * returns 0 if all gfns left paging state
111  * returns > 0 if some gfns are still in paging state
112  *
113  * Walk all gfns and try to assemble blocks of gfns in paging state.
114  * This will keep the request ring full and avoids delays.
115  */
retry_paged(int fd,uint32_t dom,void * addr,const xen_pfn_t * arr,int * err,size_t num)116 static int retry_paged(int fd, uint32_t dom, void *addr,
117                        const xen_pfn_t *arr, int *err, size_t num)
118 {
119     privcmd_mmapbatch_v2_t ioctlx;
120     int rc, paged = 0;
121     size_t i = 0;
122 
123     do
124     {
125         /* Skip gfns not in paging state */
126         if ( err[i] != -ENOENT )
127         {
128             i++;
129             continue;
130         }
131 
132         paged++;
133 
134         /* At least one gfn is still in paging state */
135         ioctlx.num = 1;
136         ioctlx.dom = dom;
137         ioctlx.addr = (unsigned long)addr + (i<<XC_PAGE_SHIFT);
138         ioctlx.arr = arr + i;
139         ioctlx.err = err + i;
140 
141         /* Assemble a batch of requests */
142         while ( ++i < num )
143         {
144             if ( err[i] != -ENOENT )
145                 break;
146             ioctlx.num++;
147         }
148 
149         /* Send request and abort on fatal error */
150         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
151         if ( rc < 0 && errno != ENOENT )
152             goto out;
153 
154     } while ( i < num );
155 
156     rc = paged;
157 out:
158     return rc;
159 }
160 
osdep_xenforeignmemory_map(xenforeignmemory_handle * fmem,uint32_t dom,void * addr,int prot,int flags,size_t num,const xen_pfn_t arr[],int err[])161 void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem,
162                                  uint32_t dom, void *addr,
163                                  int prot, int flags, size_t num,
164                                  const xen_pfn_t arr[/*num*/], int err[/*num*/])
165 {
166     int fd = fmem->fd;
167     privcmd_mmapbatch_v2_t ioctlx;
168     size_t i;
169     int rc;
170 
171     addr = mmap(addr, num << XC_PAGE_SHIFT, prot, flags | MAP_SHARED,
172                 fd, 0);
173     if ( addr == MAP_FAILED )
174         return NULL;
175 
176     ioctlx.num = num;
177     ioctlx.dom = dom;
178     ioctlx.addr = (unsigned long)addr;
179     ioctlx.arr = arr;
180     ioctlx.err = err;
181 
182     rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
183 
184     /* Command was recognized, some gfn in arr are in paging state */
185     if ( rc < 0 && errno == ENOENT )
186     {
187         do {
188             usleep(100);
189             rc = retry_paged(fd, dom, addr, arr, err, num);
190         } while ( rc > 0 );
191     }
192     /* Command was not recognized, use fall back */
193     else if ( rc < 0 && errno == EINVAL && (int)num > 0 )
194     {
195         /*
196          * IOCTL_PRIVCMD_MMAPBATCH_V2 is not supported - fall back to
197          * IOCTL_PRIVCMD_MMAPBATCH.
198          */
199         privcmd_mmapbatch_t ioctlx;
200         xen_pfn_t *pfn;
201         unsigned int pfn_arr_size = ROUNDUP((num * sizeof(*pfn)), XC_PAGE_SHIFT);
202         int os_page_size = sysconf(_SC_PAGESIZE);
203 
204         if ( pfn_arr_size <= os_page_size )
205             pfn = alloca(num * sizeof(*pfn));
206         else
207         {
208             pfn = mmap(NULL, pfn_arr_size, PROT_READ | PROT_WRITE,
209                        MAP_PRIVATE | MAP_ANON | MAP_POPULATE, -1, 0);
210             if ( pfn == MAP_FAILED )
211             {
212                 PERROR("mmap of pfn array failed");
213                 (void)munmap(addr, num << XC_PAGE_SHIFT);
214                 return NULL;
215             }
216         }
217 
218         memcpy(pfn, arr, num * sizeof(*arr));
219 
220         ioctlx.num = num;
221         ioctlx.dom = dom;
222         ioctlx.addr = (unsigned long)addr;
223         ioctlx.arr = pfn;
224 
225         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
226 
227         rc = rc < 0 ? -errno : 0;
228 
229         for ( i = 0; i < num; ++i )
230         {
231             switch ( pfn[i] ^ arr[i] )
232             {
233             case 0:
234                 err[i] = rc != -ENOENT ? rc : 0;
235                 continue;
236             default:
237                 err[i] = -EINVAL;
238                 continue;
239             case PRIVCMD_MMAPBATCH_PAGED_ERROR:
240                 if ( rc != -ENOENT )
241                 {
242                     err[i] = rc ?: -EINVAL;
243                     continue;
244                 }
245                 rc = map_foreign_batch_single(fd, dom, pfn + i,
246                         (unsigned long)addr + (i<<XC_PAGE_SHIFT));
247                 if ( rc < 0 )
248                 {
249                     rc = -errno;
250                     break;
251                 }
252                 rc = -ENOENT;
253                 continue;
254             }
255             break;
256         }
257 
258         if ( pfn_arr_size > os_page_size )
259             munmap(pfn, pfn_arr_size);
260 
261         if ( rc == -ENOENT && i == num )
262             rc = 0;
263         else if ( rc )
264         {
265             errno = -rc;
266             rc = -1;
267         }
268     }
269 
270     if ( rc < 0 )
271     {
272         int saved_errno = errno;
273 
274         (void)munmap(addr, num << XC_PAGE_SHIFT);
275         errno = saved_errno;
276         return NULL;
277     }
278 
279     return addr;
280 }
281 
osdep_xenforeignmemory_unmap(xenforeignmemory_handle * fmem,void * addr,size_t num)282 int osdep_xenforeignmemory_unmap(xenforeignmemory_handle *fmem,
283                                  void *addr, size_t num)
284 {
285     return munmap(addr, num << XC_PAGE_SHIFT);
286 }
287 
osdep_xenforeignmemory_restrict(xenforeignmemory_handle * fmem,domid_t domid)288 int osdep_xenforeignmemory_restrict(xenforeignmemory_handle *fmem,
289                                     domid_t domid)
290 {
291     return ioctl(fmem->fd, IOCTL_PRIVCMD_RESTRICT, &domid);
292 }
293 
osdep_xenforeignmemory_unmap_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)294 int osdep_xenforeignmemory_unmap_resource(
295     xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
296 {
297     return fres ? munmap(fres->addr, fres->nr_frames << XC_PAGE_SHIFT) : 0;
298 }
299 
osdep_xenforeignmemory_map_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)300 int osdep_xenforeignmemory_map_resource(
301     xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
302 {
303     privcmd_mmap_resource_t mr = {
304         .dom = fres->domid,
305         .type = fres->type,
306         .id = fres->id,
307         .idx = fres->frame,
308         .num = fres->nr_frames,
309     };
310     int rc;
311 
312     if ( !fres->addr && !fres->nr_frames )
313         /* Request for resource size.  Skip mmap(). */
314         goto skip_mmap;
315 
316     fres->addr = mmap(fres->addr, fres->nr_frames << XC_PAGE_SHIFT,
317                       fres->prot, fres->flags | MAP_SHARED, fmem->fd, 0);
318     if ( fres->addr == MAP_FAILED )
319         return -1;
320 
321     mr.addr = (uintptr_t)fres->addr;
322 
323  skip_mmap:
324     rc = ioctl(fmem->fd, IOCTL_PRIVCMD_MMAP_RESOURCE, &mr);
325     if ( rc )
326     {
327         int saved_errno;
328 
329         if ( errno == fmem->unimpl_errno )
330             errno = EOPNOTSUPP;
331 
332         if ( fres->addr )
333         {
334             saved_errno = errno;
335             osdep_xenforeignmemory_unmap_resource(fmem, fres);
336             errno = saved_errno;
337         }
338 
339         return -1;
340     }
341 
342     /* If requesting size, copy back. */
343     if ( !fres->addr )
344         fres->nr_frames = mr.num;
345 
346     return 0;
347 }
348 
349 /*
350  * Local variables:
351  * mode: C
352  * c-file-style: "BSD"
353  * c-basic-offset: 4
354  * tab-width: 4
355  * indent-tabs-mode: nil
356  * End:
357  */
358