1 /*
2  * This library is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU Lesser General Public
4  * License as published by the Free Software Foundation;
5  * version 2.1 of the License.
6  *
7  * This library is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10  * Lesser General Public License for more details.
11  *
12  * You should have received a copy of the GNU Lesser General Public
13  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
14  *
15  * Split out from xc_linus_osdep.c:
16  *
17  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
18  */
19 
20 #include <alloca.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <unistd.h>
24 #include <string.h>
25 
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 
29 #include "private.h"
30 
31 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
32 
33 #ifndef O_CLOEXEC
34 #define O_CLOEXEC 0
35 #endif
36 
osdep_xenforeignmemory_open(xenforeignmemory_handle * fmem)37 int osdep_xenforeignmemory_open(xenforeignmemory_handle *fmem)
38 {
39     int fd;
40 
41     /* prefer this newer interface */
42     fd = open("/dev/xen/privcmd", O_RDWR|O_CLOEXEC);
43 
44     if ( fd == -1 && ( errno == ENOENT || errno == ENXIO || errno == ENODEV ))
45     {
46         /* Fallback to /proc/xen/privcmd */
47         fd = open("/proc/xen/privcmd", O_RDWR|O_CLOEXEC);
48     }
49 
50     if ( fd == -1 )
51     {
52         PERROR("Could not obtain handle on privileged command interface");
53         return -1;
54     }
55 
56     fmem->fd = fd;
57     return 0;
58 }
59 
osdep_xenforeignmemory_close(xenforeignmemory_handle * fmem)60 int osdep_xenforeignmemory_close(xenforeignmemory_handle *fmem)
61 {
62     int fd = fmem->fd;
63     if (fd == -1)
64         return 0;
65     return close(fd);
66 }
67 
map_foreign_batch_single(int fd,uint32_t dom,xen_pfn_t * mfn,unsigned long addr)68 static int map_foreign_batch_single(int fd, uint32_t dom,
69                                     xen_pfn_t *mfn, unsigned long addr)
70 {
71     privcmd_mmapbatch_t ioctlx;
72     int rc;
73 
74     ioctlx.num = 1;
75     ioctlx.dom = dom;
76     ioctlx.addr = addr;
77     ioctlx.arr = mfn;
78 
79     do
80     {
81         *mfn ^= PRIVCMD_MMAPBATCH_PAGED_ERROR;
82         usleep(100);
83         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
84     }
85     while ( (rc < 0) && (errno == ENOENT) );
86 
87     return rc;
88 }
89 
90 /*
91  * Retry mmap of all paged gfns in batches
92  * retuns < 0 on fatal error
93  * returns 0 if all gfns left paging state
94  * returns > 0 if some gfns are still in paging state
95  *
96  * Walk all gfns and try to assemble blocks of gfns in paging state.
97  * This will keep the request ring full and avoids delays.
98  */
retry_paged(int fd,uint32_t dom,void * addr,const xen_pfn_t * arr,int * err,size_t num)99 static int retry_paged(int fd, uint32_t dom, void *addr,
100                        const xen_pfn_t *arr, int *err, size_t num)
101 {
102     privcmd_mmapbatch_v2_t ioctlx;
103     int rc, paged = 0;
104     size_t i = 0;
105 
106     do
107     {
108         /* Skip gfns not in paging state */
109         if ( err[i] != -ENOENT )
110         {
111             i++;
112             continue;
113         }
114 
115         paged++;
116 
117         /* At least one gfn is still in paging state */
118         ioctlx.num = 1;
119         ioctlx.dom = dom;
120         ioctlx.addr = (unsigned long)addr + (i<<PAGE_SHIFT);
121         ioctlx.arr = arr + i;
122         ioctlx.err = err + i;
123 
124         /* Assemble a batch of requests */
125         while ( ++i < num )
126         {
127             if ( err[i] != -ENOENT )
128                 break;
129             ioctlx.num++;
130         }
131 
132         /* Send request and abort on fatal error */
133         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
134         if ( rc < 0 && errno != ENOENT )
135             goto out;
136 
137     } while ( i < num );
138 
139     rc = paged;
140 out:
141     return rc;
142 }
143 
osdep_xenforeignmemory_map(xenforeignmemory_handle * fmem,uint32_t dom,void * addr,int prot,int flags,size_t num,const xen_pfn_t arr[],int err[])144 void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem,
145                                  uint32_t dom, void *addr,
146                                  int prot, int flags, size_t num,
147                                  const xen_pfn_t arr[/*num*/], int err[/*num*/])
148 {
149     int fd = fmem->fd;
150     privcmd_mmapbatch_v2_t ioctlx;
151     size_t i;
152     int rc;
153 
154     addr = mmap(addr, num << PAGE_SHIFT, prot, flags | MAP_SHARED,
155                 fd, 0);
156     if ( addr == MAP_FAILED )
157     {
158         PERROR("mmap failed");
159         return NULL;
160     }
161 
162     ioctlx.num = num;
163     ioctlx.dom = dom;
164     ioctlx.addr = (unsigned long)addr;
165     ioctlx.arr = arr;
166     ioctlx.err = err;
167 
168     rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
169 
170     /* Command was recognized, some gfn in arr are in paging state */
171     if ( rc < 0 && errno == ENOENT )
172     {
173         do {
174             usleep(100);
175             rc = retry_paged(fd, dom, addr, arr, err, num);
176         } while ( rc > 0 );
177     }
178     /* Command was not recognized, use fall back */
179     else if ( rc < 0 && errno == EINVAL && (int)num > 0 )
180     {
181         /*
182          * IOCTL_PRIVCMD_MMAPBATCH_V2 is not supported - fall back to
183          * IOCTL_PRIVCMD_MMAPBATCH.
184          */
185         privcmd_mmapbatch_t ioctlx;
186         xen_pfn_t *pfn;
187         unsigned int pfn_arr_size = ROUNDUP((num * sizeof(*pfn)), PAGE_SHIFT);
188 
189         if ( pfn_arr_size <= PAGE_SIZE )
190             pfn = alloca(num * sizeof(*pfn));
191         else
192         {
193             pfn = mmap(NULL, pfn_arr_size, PROT_READ | PROT_WRITE,
194                        MAP_PRIVATE | MAP_ANON | MAP_POPULATE, -1, 0);
195             if ( pfn == MAP_FAILED )
196             {
197                 PERROR("mmap of pfn array failed");
198                 (void)munmap(addr, num << PAGE_SHIFT);
199                 return NULL;
200             }
201         }
202 
203         memcpy(pfn, arr, num * sizeof(*arr));
204 
205         ioctlx.num = num;
206         ioctlx.dom = dom;
207         ioctlx.addr = (unsigned long)addr;
208         ioctlx.arr = pfn;
209 
210         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
211 
212         rc = rc < 0 ? -errno : 0;
213 
214         for ( i = 0; i < num; ++i )
215         {
216             switch ( pfn[i] ^ arr[i] )
217             {
218             case 0:
219                 err[i] = rc != -ENOENT ? rc : 0;
220                 continue;
221             default:
222                 err[i] = -EINVAL;
223                 continue;
224             case PRIVCMD_MMAPBATCH_PAGED_ERROR:
225                 if ( rc != -ENOENT )
226                 {
227                     err[i] = rc ?: -EINVAL;
228                     continue;
229                 }
230                 rc = map_foreign_batch_single(fd, dom, pfn + i,
231                         (unsigned long)addr + (i<<PAGE_SHIFT));
232                 if ( rc < 0 )
233                 {
234                     rc = -errno;
235                     break;
236                 }
237                 rc = -ENOENT;
238                 continue;
239             }
240             break;
241         }
242 
243         if ( pfn_arr_size > PAGE_SIZE )
244             munmap(pfn, pfn_arr_size);
245 
246         if ( rc == -ENOENT && i == num )
247             rc = 0;
248         else if ( rc )
249         {
250             errno = -rc;
251             rc = -1;
252         }
253     }
254 
255     if ( rc < 0 )
256     {
257         int saved_errno = errno;
258 
259         PERROR("ioctl failed");
260         (void)munmap(addr, num << PAGE_SHIFT);
261         errno = saved_errno;
262         return NULL;
263     }
264 
265     return addr;
266 }
267 
osdep_xenforeignmemory_unmap(xenforeignmemory_handle * fmem,void * addr,size_t num)268 int osdep_xenforeignmemory_unmap(xenforeignmemory_handle *fmem,
269                                  void *addr, size_t num)
270 {
271     return munmap(addr, num << PAGE_SHIFT);
272 }
273 
osdep_xenforeignmemory_restrict(xenforeignmemory_handle * fmem,domid_t domid)274 int osdep_xenforeignmemory_restrict(xenforeignmemory_handle *fmem,
275                                     domid_t domid)
276 {
277     return ioctl(fmem->fd, IOCTL_PRIVCMD_RESTRICT, &domid);
278 }
279 
280 /*
281  * Local variables:
282  * mode: C
283  * c-file-style: "BSD"
284  * c-basic-offset: 4
285  * tab-width: 4
286  * indent-tabs-mode: nil
287  * End:
288  */
289