1 // Copyright 2016 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6 
7 #include "vm/vm_object_paged.h"
8 
9 #include "vm_priv.h"
10 
11 #include <arch/ops.h>
12 #include <assert.h>
13 #include <err.h>
14 #include <fbl/alloc_checker.h>
15 #include <fbl/auto_call.h>
16 #include <inttypes.h>
17 #include <ktl/move.h>
18 #include <lib/console.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <trace.h>
22 #include <vm/fault.h>
23 #include <vm/physmap.h>
24 #include <vm/vm.h>
25 #include <vm/vm_address_region.h>
26 #include <zircon/types.h>
27 
28 #define LOCAL_TRACE MAX(VM_GLOBAL_TRACE, 0)
29 
30 namespace {
31 
ZeroPage(paddr_t pa)32 void ZeroPage(paddr_t pa) {
33     void* ptr = paddr_to_physmap(pa);
34     DEBUG_ASSERT(ptr);
35 
36     arch_zero_page(ptr);
37 }
38 
ZeroPage(vm_page_t * p)39 void ZeroPage(vm_page_t* p) {
40     paddr_t pa = p->paddr();
41     ZeroPage(pa);
42 }
43 
InitializeVmPage(vm_page_t * p)44 void InitializeVmPage(vm_page_t* p) {
45     DEBUG_ASSERT(p->state == VM_PAGE_STATE_ALLOC);
46     p->state = VM_PAGE_STATE_OBJECT;
47     p->object.pin_count = 0;
48 }
49 
50 // round up the size to the next page size boundary and make sure we dont wrap
RoundSize(uint64_t size,uint64_t * out_size)51 zx_status_t RoundSize(uint64_t size, uint64_t* out_size) {
52     *out_size = ROUNDUP_PAGE_SIZE(size);
53     if (*out_size < size) {
54         return ZX_ERR_OUT_OF_RANGE;
55     }
56 
57     // there's a max size to keep indexes within range
58     if (*out_size > VmObjectPaged::MAX_SIZE) {
59         return ZX_ERR_OUT_OF_RANGE;
60     }
61 
62     return ZX_OK;
63 }
64 
65 } // namespace
66 
VmObjectPaged(uint32_t options,uint32_t pmm_alloc_flags,uint64_t size,fbl::RefPtr<VmObject> parent,fbl::RefPtr<PageSource> page_source)67 VmObjectPaged::VmObjectPaged(
68     uint32_t options, uint32_t pmm_alloc_flags, uint64_t size,
69     fbl::RefPtr<VmObject> parent, fbl::RefPtr<PageSource> page_source)
70     : VmObject(ktl::move(parent)),
71       options_(options),
72       size_(size),
73       pmm_alloc_flags_(pmm_alloc_flags),
74       page_source_(ktl::move(page_source)) {
75     LTRACEF("%p\n", this);
76 
77     DEBUG_ASSERT(IS_PAGE_ALIGNED(size_));
78     DEBUG_ASSERT(page_source_ == nullptr || parent_ == nullptr);
79 }
80 
~VmObjectPaged()81 VmObjectPaged::~VmObjectPaged() {
82     canary_.Assert();
83 
84     LTRACEF("%p\n", this);
85 
86     page_list_.ForEveryPage(
87         [this](const auto p, uint64_t off) {
88             if (this->is_contiguous()) {
89                 p->object.pin_count--;
90             }
91             ASSERT(p->object.pin_count == 0);
92             return ZX_ERR_NEXT;
93         });
94 
95     // free all of the pages attached to us
96     page_list_.FreeAllPages();
97 
98     if (page_source_) {
99         page_source_->Close();
100     }
101 }
102 
Create(uint32_t pmm_alloc_flags,uint32_t options,uint64_t size,fbl::RefPtr<VmObject> * obj)103 zx_status_t VmObjectPaged::Create(uint32_t pmm_alloc_flags,
104                                   uint32_t options,
105                                   uint64_t size, fbl::RefPtr<VmObject>* obj) {
106     // make sure size is page aligned
107     zx_status_t status = RoundSize(size, &size);
108     if (status != ZX_OK) {
109         return status;
110     }
111 
112     if (options & kContiguous) {
113         // Force callers to use CreateContiguous() instead.
114         return ZX_ERR_INVALID_ARGS;
115     }
116 
117     fbl::AllocChecker ac;
118     auto vmo = fbl::AdoptRef<VmObject>(
119         new (&ac) VmObjectPaged(options, pmm_alloc_flags, size, nullptr, nullptr));
120     if (!ac.check()) {
121         return ZX_ERR_NO_MEMORY;
122     }
123 
124     *obj = ktl::move(vmo);
125 
126     return ZX_OK;
127 }
128 
CreateContiguous(uint32_t pmm_alloc_flags,uint64_t size,uint8_t alignment_log2,fbl::RefPtr<VmObject> * obj)129 zx_status_t VmObjectPaged::CreateContiguous(uint32_t pmm_alloc_flags, uint64_t size,
130                                             uint8_t alignment_log2, fbl::RefPtr<VmObject>* obj) {
131     DEBUG_ASSERT(alignment_log2 < sizeof(uint64_t) * 8);
132     // make sure size is page aligned
133     zx_status_t status = RoundSize(size, &size);
134     if (status != ZX_OK) {
135         return status;
136     }
137 
138     fbl::AllocChecker ac;
139     auto vmo = fbl::AdoptRef<VmObject>(
140         new (&ac) VmObjectPaged(kContiguous, pmm_alloc_flags, size, nullptr, nullptr));
141     if (!ac.check()) {
142         return ZX_ERR_NO_MEMORY;
143     }
144 
145     if (size == 0) {
146         *obj = ktl::move(vmo);
147         return ZX_OK;
148     }
149 
150     // allocate the pages
151     list_node page_list;
152     list_initialize(&page_list);
153 
154     size_t num_pages = size / PAGE_SIZE;
155     paddr_t pa;
156     status = pmm_alloc_contiguous(num_pages, pmm_alloc_flags, alignment_log2, &pa, &page_list);
157     if (status != ZX_OK) {
158         LTRACEF("failed to allocate enough pages (asked for %zu)\n", num_pages);
159         return ZX_ERR_NO_MEMORY;
160     }
161     auto cleanup_phys_pages = fbl::MakeAutoCall([&page_list]() {
162         pmm_free(&page_list);
163     });
164 
165     // add them to the appropriate range of the object
166     VmObjectPaged* vmop = static_cast<VmObjectPaged*>(vmo.get());
167     for (uint64_t off = 0; off < size; off += PAGE_SIZE) {
168         vm_page_t* p = list_remove_head_type(&page_list, vm_page_t, queue_node);
169         ASSERT(p);
170 
171         InitializeVmPage(p);
172 
173         // TODO: remove once pmm returns zeroed pages
174         ZeroPage(p);
175 
176         // We don't need thread-safety analysis here, since this VMO has not
177         // been shared anywhere yet.
178         [&]() TA_NO_THREAD_SAFETY_ANALYSIS {
179             status = vmop->page_list_.AddPage(p, off);
180         }();
181         if (status != ZX_OK) {
182             return status;
183         }
184 
185         // Mark the pages as pinned, so they can't be physically rearranged
186         // underneath us.
187         p->object.pin_count++;
188     }
189 
190     cleanup_phys_pages.cancel();
191     *obj = ktl::move(vmo);
192     return ZX_OK;
193 }
194 
CreateFromROData(const void * data,size_t size,fbl::RefPtr<VmObject> * obj)195 zx_status_t VmObjectPaged::CreateFromROData(const void* data, size_t size, fbl::RefPtr<VmObject>* obj) {
196     LTRACEF("data %p, size %zu\n", data, size);
197 
198     fbl::RefPtr<VmObject> vmo;
199     zx_status_t status = Create(PMM_ALLOC_FLAG_ANY, 0, size, &vmo);
200     if (status != ZX_OK) {
201         return status;
202     }
203 
204     if (size > 0) {
205         ASSERT(IS_PAGE_ALIGNED(size));
206         ASSERT(IS_PAGE_ALIGNED(reinterpret_cast<uintptr_t>(data)));
207 
208         // Do a direct lookup of the physical pages backing the range of
209         // the kernel that these addresses belong to and jam them directly
210         // into the VMO.
211         //
212         // NOTE: This relies on the kernel not otherwise owning the pages.
213         // If the setup of the kernel's address space changes so that the
214         // pages are attached to a kernel VMO, this will need to change.
215 
216         paddr_t start_paddr = vaddr_to_paddr(data);
217         ASSERT(start_paddr != 0);
218 
219         for (size_t count = 0; count < size / PAGE_SIZE; count++) {
220             paddr_t pa = start_paddr + count * PAGE_SIZE;
221             vm_page_t* page = paddr_to_vm_page(pa);
222             ASSERT(page);
223 
224             if (page->state == VM_PAGE_STATE_WIRED) {
225                 // it's wired to the kernel, so we can just use it directly
226             } else if (page->state == VM_PAGE_STATE_FREE) {
227                 list_node list = LIST_INITIAL_VALUE(list);
228                 ASSERT(pmm_alloc_range(pa, 1, &list) == ZX_OK);
229                 page->state = VM_PAGE_STATE_WIRED;
230             } else {
231                 panic("page used to back static vmo in unusable state: paddr %#" PRIxPTR " state %u\n", pa,
232                       page->state);
233             }
234 
235             // XXX hack to work around the ref pointer to the base class
236             auto vmo2 = static_cast<VmObjectPaged*>(vmo.get());
237             vmo2->AddPage(page, count * PAGE_SIZE);
238         }
239     }
240 
241     *obj = ktl::move(vmo);
242 
243     return ZX_OK;
244 }
245 
CreateExternal(fbl::RefPtr<PageSource> src,uint64_t size,fbl::RefPtr<VmObject> * obj)246 zx_status_t VmObjectPaged::CreateExternal(fbl::RefPtr<PageSource> src,
247                                           uint64_t size, fbl::RefPtr<VmObject>* obj) {
248     // make sure size is page aligned
249     zx_status_t status = RoundSize(size, &size);
250     if (status != ZX_OK) {
251         return status;
252     }
253 
254     fbl::AllocChecker ac;
255     auto vmo = fbl::AdoptRef<VmObject>(new (&ac) VmObjectPaged(
256             kResizable, PMM_ALLOC_FLAG_ANY, size, nullptr, ktl::move(src)));
257     if (!ac.check()) {
258         return ZX_ERR_NO_MEMORY;
259     }
260 
261     *obj = ktl::move(vmo);
262 
263     return ZX_OK;
264 }
265 
CloneCOW(bool resizable,uint64_t offset,uint64_t size,bool copy_name,fbl::RefPtr<VmObject> * clone_vmo)266 zx_status_t VmObjectPaged::CloneCOW(bool resizable, uint64_t offset, uint64_t size,
267                                     bool copy_name, fbl::RefPtr<VmObject>* clone_vmo) {
268     LTRACEF("vmo %p offset %#" PRIx64 " size %#" PRIx64 "\n", this, offset, size);
269 
270     canary_.Assert();
271 
272     // make sure size is page aligned
273     zx_status_t status = RoundSize(size, &size);
274     if (status != ZX_OK) {
275         return status;
276     }
277 
278     auto options = resizable ? kResizable : 0u;
279 
280     // allocate the clone up front outside of our lock
281     fbl::AllocChecker ac;
282     auto vmo = fbl::AdoptRef<VmObjectPaged>(
283         new (&ac) VmObjectPaged(options, pmm_alloc_flags_, size, fbl::WrapRefPtr(this), nullptr));
284     if (!ac.check()) {
285         return ZX_ERR_NO_MEMORY;
286     }
287 
288     Guard<fbl::Mutex> guard{&lock_};
289 
290     // add the new VMO as a child before we do anything, since its
291     // dtor expects to find it in its parent's child list
292     AddChildLocked(vmo.get());
293 
294     // check that we're not uncached in some way
295     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
296         return ZX_ERR_BAD_STATE;
297     }
298 
299     // set the offset with the parent
300     status = vmo->SetParentOffsetLocked(offset);
301     if (status != ZX_OK) {
302         return status;
303     }
304 
305     if (copy_name) {
306         vmo->name_ = name_;
307     }
308 
309     *clone_vmo = ktl::move(vmo);
310 
311     return ZX_OK;
312 }
313 
Dump(uint depth,bool verbose)314 void VmObjectPaged::Dump(uint depth, bool verbose) {
315     canary_.Assert();
316 
317     // This can grab our lock.
318     uint64_t parent_id = parent_user_id();
319 
320     Guard<fbl::Mutex> guard{&lock_};
321 
322     size_t count = 0;
323     page_list_.ForEveryPage([&count](const auto p, uint64_t) {
324         count++;
325         return ZX_ERR_NEXT;
326     });
327 
328     for (uint i = 0; i < depth; ++i) {
329         printf("  ");
330     }
331     printf("vmo %p/k%" PRIu64 " size %#" PRIx64
332            " pages %zu ref %d parent k%" PRIu64 "\n",
333            this, user_id_, size_, count, ref_count_debug(), parent_id);
334 
335     if (verbose) {
336         auto f = [depth](const auto p, uint64_t offset) {
337             for (uint i = 0; i < depth + 1; ++i) {
338                 printf("  ");
339             }
340             printf("offset %#" PRIx64 " page %p paddr %#" PRIxPTR "\n", offset, p, p->paddr());
341             return ZX_ERR_NEXT;
342         };
343         page_list_.ForEveryPage(f);
344     }
345 }
346 
AllocatedPagesInRange(uint64_t offset,uint64_t len) const347 size_t VmObjectPaged::AllocatedPagesInRange(uint64_t offset, uint64_t len) const {
348     canary_.Assert();
349     Guard<fbl::Mutex> guard{&lock_};
350     uint64_t new_len;
351     if (!TrimRange(offset, len, size_, &new_len)) {
352         return 0;
353     }
354     size_t count = 0;
355     // TODO: Figure out what to do with our parent's pages. If we're a clone,
356     // page_list_ only contains pages that we've made copies of.
357     page_list_.ForEveryPage(
358         [&count, offset, new_len](const auto p, uint64_t off) {
359             if (off >= offset && off < offset + new_len) {
360                 count++;
361             }
362             return ZX_ERR_NEXT;
363         });
364     return count;
365 }
366 
AddPage(vm_page_t * p,uint64_t offset)367 zx_status_t VmObjectPaged::AddPage(vm_page_t* p, uint64_t offset) {
368     Guard<fbl::Mutex> guard{&lock_};
369 
370     return AddPageLocked(p, offset);
371 }
372 
AddPageLocked(vm_page_t * p,uint64_t offset)373 zx_status_t VmObjectPaged::AddPageLocked(vm_page_t* p, uint64_t offset) {
374     canary_.Assert();
375     DEBUG_ASSERT(lock_.lock().IsHeld());
376 
377     LTRACEF("vmo %p, offset %#" PRIx64 ", page %p (%#" PRIxPTR ")\n", this, offset, p, p->paddr());
378 
379     DEBUG_ASSERT(p);
380 
381     if (offset >= size_) {
382         return ZX_ERR_OUT_OF_RANGE;
383     }
384 
385     zx_status_t err = page_list_.AddPage(p, offset);
386     if (err != ZX_OK) {
387         return err;
388     }
389 
390     // other mappings may have covered this offset into the vmo, so unmap those ranges
391     RangeChangeUpdateLocked(offset, PAGE_SIZE);
392 
393     return ZX_OK;
394 }
395 
396 // Looks up the page at the requested offset, faulting it in if requested and necessary.  If
397 // this VMO has a parent and the requested page isn't found, the parent will be searched.
398 //
399 // |free_list|, if not NULL, is a list of allocated but unused vm_page_t that
400 // this function may allocate from.  This function will need at most one entry,
401 // and will not fail if |free_list| is a non-empty list, faulting in was requested,
402 // and offset is in range.
GetPageLocked(uint64_t offset,uint pf_flags,list_node * free_list,vm_page_t ** const page_out,paddr_t * const pa_out)403 zx_status_t VmObjectPaged::GetPageLocked(uint64_t offset, uint pf_flags, list_node* free_list,
404                                          vm_page_t** const page_out, paddr_t* const pa_out) {
405     canary_.Assert();
406     DEBUG_ASSERT(lock_.lock().IsHeld());
407 
408     if (offset >= size_) {
409         return ZX_ERR_OUT_OF_RANGE;
410     }
411 
412     vm_page_t* p;
413     paddr_t pa;
414 
415     // see if we already have a page at that offset
416     p = page_list_.GetPage(offset);
417     if (p) {
418         if (page_out) {
419             *page_out = p;
420         }
421         if (pa_out) {
422             *pa_out = p->paddr();
423         }
424         return ZX_OK;
425     }
426 
427     __UNUSED char pf_string[5];
428     LTRACEF("vmo %p, offset %#" PRIx64 ", pf_flags %#x (%s)\n", this, offset, pf_flags,
429             vmm_pf_flags_to_string(pf_flags, pf_string));
430 
431     // if we have a parent see if they have a page for us
432     if (parent_) {
433         uint64_t parent_offset;
434         bool overflowed = add_overflow(parent_offset_, offset, &parent_offset);
435         ASSERT(!overflowed);
436 
437         // make sure we don't cause the parent to fault in new pages, just ask for any that already exist
438         uint parent_pf_flags = pf_flags & ~(VMM_PF_FLAG_FAULT_MASK);
439 
440         zx_status_t status = parent_->GetPageLocked(parent_offset, parent_pf_flags,
441                                                     nullptr, &p, &pa);
442         if (status == ZX_OK) {
443             // we have a page from them. if we're read-only faulting, return that page so they can map
444             // or read from it directly
445             if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) {
446                 if (page_out) {
447                     *page_out = p;
448                 }
449                 if (pa_out) {
450                     *pa_out = pa;
451                 }
452 
453                 LTRACEF("read only faulting in page %p, pa %#" PRIxPTR " from parent\n", p, pa);
454 
455                 return ZX_OK;
456             }
457 
458             // if we're write faulting, we need to clone it and return the new page
459             paddr_t pa_clone;
460             vm_page_t* p_clone = nullptr;
461             if (free_list) {
462                 p_clone = list_remove_head_type(free_list, vm_page, queue_node);
463                 if (p_clone) {
464                     pa_clone = p_clone->paddr();
465                 }
466             }
467             if (!p_clone) {
468                 status = pmm_alloc_page(pmm_alloc_flags_, &p_clone, &pa_clone);
469             }
470             if (!p_clone) {
471                 return ZX_ERR_NO_MEMORY;
472             }
473 
474             InitializeVmPage(p_clone);
475 
476             // do a direct copy of the two pages
477             const void* src = paddr_to_physmap(pa);
478             void* dst = paddr_to_physmap(pa_clone);
479 
480             DEBUG_ASSERT(src && dst);
481 
482             memcpy(dst, src, PAGE_SIZE);
483 
484             // add the new page and return it
485             status = AddPageLocked(p_clone, offset);
486             DEBUG_ASSERT(status == ZX_OK);
487 
488             LTRACEF("copy-on-write faulted in page %p, pa %#" PRIxPTR " copied from %p, pa %#" PRIxPTR "\n",
489                     p, pa, p_clone, pa_clone);
490 
491             if (page_out) {
492                 *page_out = p_clone;
493             }
494             if (pa_out) {
495                 *pa_out = pa_clone;
496             }
497 
498             return ZX_OK;
499         }
500     }
501 
502     // if we're not being asked to sw or hw fault in the page, return not found
503     if ((pf_flags & VMM_PF_FLAG_FAULT_MASK) == 0) {
504         return ZX_ERR_NOT_FOUND;
505     }
506 
507     // if we're read faulting, we don't already have a page, and the parent doesn't have it,
508     // return the single global zero page
509     if ((pf_flags & VMM_PF_FLAG_WRITE) == 0) {
510         LTRACEF("returning the zero page\n");
511         if (page_out) {
512             *page_out = vm_get_zero_page();
513         }
514         if (pa_out) {
515             *pa_out = vm_get_zero_page_paddr();
516         }
517         return ZX_OK;
518     }
519 
520     // allocate a page
521     if (free_list) {
522         p = list_remove_head_type(free_list, vm_page, queue_node);
523         if (p) {
524             pa = p->paddr();
525         }
526     }
527     if (!p) {
528         pmm_alloc_page(pmm_alloc_flags_, &p, &pa);
529     }
530     if (!p) {
531         return ZX_ERR_NO_MEMORY;
532     }
533 
534     InitializeVmPage(p);
535 
536     // TODO: remove once pmm returns zeroed pages
537     ZeroPage(pa);
538 
539 // if ARM and not fully cached, clean/invalidate the page after zeroing it
540 #if ARCH_ARM64
541     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
542         arch_clean_invalidate_cache_range((addr_t)paddr_to_physmap(pa), PAGE_SIZE);
543     }
544 #endif
545 
546     zx_status_t status = AddPageLocked(p, offset);
547     DEBUG_ASSERT(status == ZX_OK);
548 
549     // other mappings may have covered this offset into the vmo, so unmap those ranges
550     RangeChangeUpdateLocked(offset, PAGE_SIZE);
551 
552     LTRACEF("faulted in page %p, pa %#" PRIxPTR "\n", p, pa);
553 
554     if (page_out) {
555         *page_out = p;
556     }
557     if (pa_out) {
558         *pa_out = pa;
559     }
560 
561     return ZX_OK;
562 }
563 
CommitRange(uint64_t offset,uint64_t len)564 zx_status_t VmObjectPaged::CommitRange(uint64_t offset, uint64_t len) {
565     canary_.Assert();
566     LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
567 
568     Guard<fbl::Mutex> guard{&lock_};
569 
570     // trim the size
571     uint64_t new_len;
572     if (!TrimRange(offset, len, size_, &new_len)) {
573         return ZX_ERR_OUT_OF_RANGE;
574     }
575 
576     // was in range, just zero length
577     if (new_len == 0) {
578         return ZX_OK;
579     }
580 
581     // compute a page aligned end to do our searches in to make sure we cover all the pages
582     uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len);
583     DEBUG_ASSERT(end > offset);
584     offset = ROUNDDOWN(offset, PAGE_SIZE);
585 
586     // make a pass through the list, counting the number of pages we need to allocate
587     size_t count = 0;
588     uint64_t expected_next_off = offset;
589     page_list_.ForEveryPageInRange(
590         [&count, &expected_next_off](const auto p, uint64_t off) {
591 
592             count += (off - expected_next_off) / PAGE_SIZE;
593             expected_next_off = off + PAGE_SIZE;
594             return ZX_ERR_NEXT;
595         },
596         expected_next_off, end);
597 
598     // If expected_next_off isn't at the end of the range, there was a gap at
599     // the end.  Add it back in
600     DEBUG_ASSERT(end >= expected_next_off);
601     count += (end - expected_next_off) / PAGE_SIZE;
602     if (count == 0) {
603         return ZX_OK;
604     }
605 
606     // allocate count number of pages
607     list_node page_list;
608     list_initialize(&page_list);
609 
610     zx_status_t status = pmm_alloc_pages(count, pmm_alloc_flags_, &page_list);
611     if (status != ZX_OK) {
612         return status;
613     }
614 
615     // unmap all of the pages in this range on all the mapping regions
616     RangeChangeUpdateLocked(offset, end - offset);
617 
618     // add them to the appropriate range of the object
619     for (uint64_t o = offset; o < end; o += PAGE_SIZE) {
620         // Don't commit if we already have this page
621         vm_page_t* p = page_list_.GetPage(o);
622         if (p) {
623             continue;
624         }
625 
626         // Check if our parent has the page
627         paddr_t pa;
628         const uint flags = VMM_PF_FLAG_SW_FAULT | VMM_PF_FLAG_WRITE;
629         // Should not be able to fail, since we're providing it memory and the
630         // range should be valid.
631         zx_status_t status = GetPageLocked(o, flags, &page_list, &p, &pa);
632         ASSERT(status == ZX_OK);
633     }
634 
635     DEBUG_ASSERT(list_is_empty(&page_list));
636 
637     return ZX_OK;
638 }
639 
DecommitRange(uint64_t offset,uint64_t len)640 zx_status_t VmObjectPaged::DecommitRange(uint64_t offset, uint64_t len) {
641     canary_.Assert();
642     LTRACEF("offset %#" PRIx64 ", len %#" PRIx64 "\n", offset, len);
643 
644     if (options_ & kContiguous) {
645         return ZX_ERR_NOT_SUPPORTED;
646     }
647 
648     Guard<fbl::Mutex> guard{&lock_};
649 
650     // trim the size
651     uint64_t new_len;
652     if (!TrimRange(offset, len, size_, &new_len)) {
653         return ZX_ERR_OUT_OF_RANGE;
654     }
655 
656     // was in range, just zero length
657     if (new_len == 0) {
658         return ZX_OK;
659     }
660 
661     // figure the starting and ending page offset
662     uint64_t start = ROUNDDOWN(offset, PAGE_SIZE);
663     uint64_t end = ROUNDUP_PAGE_SIZE(offset + new_len);
664     DEBUG_ASSERT(end > offset);
665     DEBUG_ASSERT(end > start);
666     uint64_t page_aligned_len = end - start;
667 
668     LTRACEF("start offset %#" PRIx64 ", end %#" PRIx64 ", page_aliged_len %#" PRIx64 "\n", start, end,
669             page_aligned_len);
670 
671     // TODO(teisenbe): Allow decommitting of pages pinned by
672     // CommitRangeContiguous
673 
674     if (AnyPagesPinnedLocked(start, page_aligned_len)) {
675         return ZX_ERR_BAD_STATE;
676     }
677 
678     // unmap all of the pages in this range on all the mapping regions
679     RangeChangeUpdateLocked(start, page_aligned_len);
680 
681     page_list_.FreePages(start, end);
682 
683     return ZX_OK;
684 }
685 
Pin(uint64_t offset,uint64_t len)686 zx_status_t VmObjectPaged::Pin(uint64_t offset, uint64_t len) {
687     canary_.Assert();
688 
689     Guard<fbl::Mutex> guard{&lock_};
690     return PinLocked(offset, len);
691 }
692 
PinLocked(uint64_t offset,uint64_t len)693 zx_status_t VmObjectPaged::PinLocked(uint64_t offset, uint64_t len) {
694     canary_.Assert();
695 
696     // verify that the range is within the object
697     if (unlikely(!InRange(offset, len, size_))) {
698         return ZX_ERR_OUT_OF_RANGE;
699     }
700 
701     if (unlikely(len == 0)) {
702         return ZX_OK;
703     }
704 
705     const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
706     const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
707 
708     uint64_t expected_next_off = start_page_offset;
709     zx_status_t status = page_list_.ForEveryPageInRange(
710         [&expected_next_off](const auto p, uint64_t off) {
711             if (off != expected_next_off) {
712                 return ZX_ERR_NOT_FOUND;
713             }
714 
715             DEBUG_ASSERT(p->state == VM_PAGE_STATE_OBJECT);
716             if (p->object.pin_count == VM_PAGE_OBJECT_MAX_PIN_COUNT) {
717                 return ZX_ERR_UNAVAILABLE;
718             }
719 
720             p->object.pin_count++;
721             expected_next_off = off + PAGE_SIZE;
722             return ZX_ERR_NEXT;
723         },
724         start_page_offset, end_page_offset);
725 
726     if (status == ZX_OK && expected_next_off != end_page_offset) {
727         status = ZX_ERR_NOT_FOUND;
728     }
729     if (status != ZX_OK) {
730         UnpinLocked(start_page_offset, expected_next_off - start_page_offset);
731         return status;
732     }
733 
734     return ZX_OK;
735 }
736 
Unpin(uint64_t offset,uint64_t len)737 void VmObjectPaged::Unpin(uint64_t offset, uint64_t len) {
738     Guard<fbl::Mutex> guard{&lock_};
739     UnpinLocked(offset, len);
740 }
741 
UnpinLocked(uint64_t offset,uint64_t len)742 void VmObjectPaged::UnpinLocked(uint64_t offset, uint64_t len) {
743     canary_.Assert();
744     DEBUG_ASSERT(lock_.lock().IsHeld());
745 
746     // verify that the range is within the object
747     ASSERT(InRange(offset, len, size_));
748 
749     if (unlikely(len == 0)) {
750         return;
751     }
752 
753     const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
754     const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
755 
756     uint64_t expected_next_off = start_page_offset;
757     zx_status_t status = page_list_.ForEveryPageInRange(
758         [&expected_next_off](const auto p, uint64_t off) {
759             if (off != expected_next_off) {
760                 return ZX_ERR_NOT_FOUND;
761             }
762 
763             DEBUG_ASSERT(p->state == VM_PAGE_STATE_OBJECT);
764             ASSERT(p->object.pin_count > 0);
765             p->object.pin_count--;
766             expected_next_off = off + PAGE_SIZE;
767             return ZX_ERR_NEXT;
768         },
769         start_page_offset, end_page_offset);
770     ASSERT_MSG(status == ZX_OK && expected_next_off == end_page_offset,
771                "Tried to unpin an uncommitted page");
772     return;
773 }
774 
AnyPagesPinnedLocked(uint64_t offset,size_t len)775 bool VmObjectPaged::AnyPagesPinnedLocked(uint64_t offset, size_t len) {
776     canary_.Assert();
777     DEBUG_ASSERT(lock_.lock().IsHeld());
778     DEBUG_ASSERT(IS_PAGE_ALIGNED(offset));
779     DEBUG_ASSERT(IS_PAGE_ALIGNED(len));
780 
781     const uint64_t start_page_offset = offset;
782     const uint64_t end_page_offset = offset + len;
783 
784     bool found_pinned = false;
785     page_list_.ForEveryPageInRange(
786         [&found_pinned, start_page_offset, end_page_offset](const auto p, uint64_t off) {
787             DEBUG_ASSERT(off >= start_page_offset && off < end_page_offset);
788             if (p->object.pin_count > 0) {
789                 found_pinned = true;
790                 return ZX_ERR_STOP;
791             }
792             return ZX_ERR_NEXT;
793         },
794         start_page_offset, end_page_offset);
795 
796     return found_pinned;
797 }
798 
ResizeLocked(uint64_t s)799 zx_status_t VmObjectPaged::ResizeLocked(uint64_t s) {
800     canary_.Assert();
801     DEBUG_ASSERT(lock_.lock().IsHeld());
802 
803     LTRACEF("vmo %p, size %" PRIu64 "\n", this, s);
804 
805     if (!(options_ & kResizable)) {
806         return ZX_ERR_UNAVAILABLE;
807     }
808 
809     // round up the size to the next page size boundary and make sure we dont wrap
810     zx_status_t status = RoundSize(s, &s);
811     if (status != ZX_OK) {
812         return status;
813     }
814 
815     // make sure everything is aligned before we get started
816     DEBUG_ASSERT(IS_PAGE_ALIGNED(size_));
817     DEBUG_ASSERT(IS_PAGE_ALIGNED(s));
818 
819     // see if we're shrinking or expanding the vmo
820     if (s < size_) {
821         // shrinking
822         uint64_t start = s;
823         uint64_t end = size_;
824         uint64_t len = end - start;
825 
826         // bail if there are any pinned pages in the range we're trimming
827         if (AnyPagesPinnedLocked(start, len)) {
828             return ZX_ERR_BAD_STATE;
829         }
830 
831         // unmap all of the pages in this range on all the mapping regions
832         RangeChangeUpdateLocked(start, len);
833 
834         page_list_.FreePages(start, end);
835     } else if (s > size_) {
836         // expanding
837         // figure the starting and ending page offset that is affected
838         uint64_t start = size_;
839         uint64_t end = s;
840         uint64_t len = end - start;
841 
842         // inform all our children or mapping that there's new bits
843         RangeChangeUpdateLocked(start, len);
844     }
845 
846     // save bytewise size
847     size_ = s;
848 
849     return ZX_OK;
850 }
851 
Resize(uint64_t s)852 zx_status_t VmObjectPaged::Resize(uint64_t s) {
853     Guard<fbl::Mutex> guard{&lock_};
854 
855     return ResizeLocked(s);
856 }
857 
SetParentOffsetLocked(uint64_t offset)858 zx_status_t VmObjectPaged::SetParentOffsetLocked(uint64_t offset) {
859     DEBUG_ASSERT(lock_.lock().IsHeld());
860 
861     // offset must be page aligned
862     if (!IS_PAGE_ALIGNED(offset)) {
863         return ZX_ERR_INVALID_ARGS;
864     }
865 
866     // TODO: ZX-692 make sure that the accumulated offset of the entire parent chain doesn't wrap 64bit space
867 
868     // make sure the size + this offset are still valid
869     uint64_t end;
870     if (add_overflow(offset, size_, &end)) {
871         return ZX_ERR_OUT_OF_RANGE;
872     }
873 
874     parent_offset_ = offset;
875 
876     return ZX_OK;
877 }
878 
879 // perform some sort of copy in/out on a range of the object using a passed in lambda
880 // for the copy routine
881 template <typename T>
ReadWriteInternal(uint64_t offset,size_t len,bool write,T copyfunc)882 zx_status_t VmObjectPaged::ReadWriteInternal(uint64_t offset, size_t len, bool write, T copyfunc) {
883     canary_.Assert();
884 
885     Guard<fbl::Mutex> guard{&lock_};
886 
887     // are we uncached? abort in this case
888     if (cache_policy_ != ARCH_MMU_FLAG_CACHED) {
889         return ZX_ERR_BAD_STATE;
890     }
891 
892     // test if in range
893     uint64_t end_offset;
894     if (add_overflow(offset, len, &end_offset) || end_offset > size_) {
895         return ZX_ERR_OUT_OF_RANGE;
896     }
897 
898     // walk the list of pages and do the write
899     uint64_t src_offset = offset;
900     size_t dest_offset = 0;
901     while (len > 0) {
902         size_t page_offset = src_offset % PAGE_SIZE;
903         size_t tocopy = MIN(PAGE_SIZE - page_offset, len);
904 
905         // fault in the page
906         paddr_t pa;
907         auto status = GetPageLocked(src_offset,
908                                     VMM_PF_FLAG_SW_FAULT | (write ? VMM_PF_FLAG_WRITE : 0),
909                                     nullptr, nullptr, &pa);
910         if (status != ZX_OK) {
911             return status;
912         }
913 
914         // compute the kernel mapping of this page
915         uint8_t* page_ptr = reinterpret_cast<uint8_t*>(paddr_to_physmap(pa));
916 
917         // call the copy routine
918         auto err = copyfunc(page_ptr + page_offset, dest_offset, tocopy);
919         if (err < 0) {
920             return err;
921         }
922 
923         src_offset += tocopy;
924         dest_offset += tocopy;
925         len -= tocopy;
926     }
927 
928     return ZX_OK;
929 }
930 
Read(void * _ptr,uint64_t offset,size_t len)931 zx_status_t VmObjectPaged::Read(void* _ptr, uint64_t offset, size_t len) {
932     canary_.Assert();
933     // test to make sure this is a kernel pointer
934     if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
935         DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
936         return ZX_ERR_INVALID_ARGS;
937     }
938 
939     // read routine that just uses a memcpy
940     uint8_t* ptr = reinterpret_cast<uint8_t*>(_ptr);
941     auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t {
942         memcpy(ptr + offset, src, len);
943         return ZX_OK;
944     };
945 
946     return ReadWriteInternal(offset, len, false, read_routine);
947 }
948 
Write(const void * _ptr,uint64_t offset,size_t len)949 zx_status_t VmObjectPaged::Write(const void* _ptr, uint64_t offset, size_t len) {
950     canary_.Assert();
951     // test to make sure this is a kernel pointer
952     if (!is_kernel_address(reinterpret_cast<vaddr_t>(_ptr))) {
953         DEBUG_ASSERT_MSG(0, "non kernel pointer passed\n");
954         return ZX_ERR_INVALID_ARGS;
955     }
956 
957     // write routine that just uses a memcpy
958     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(_ptr);
959     auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t {
960         memcpy(dst, ptr + offset, len);
961         return ZX_OK;
962     };
963 
964     return ReadWriteInternal(offset, len, true, write_routine);
965 }
966 
Lookup(uint64_t offset,uint64_t len,vmo_lookup_fn_t lookup_fn,void * context)967 zx_status_t VmObjectPaged::Lookup(uint64_t offset, uint64_t len,
968                                   vmo_lookup_fn_t lookup_fn, void* context) {
969     canary_.Assert();
970     if (unlikely(len == 0)) {
971         return ZX_ERR_INVALID_ARGS;
972     }
973 
974     Guard<fbl::Mutex> guard{&lock_};
975 
976     // verify that the range is within the object
977     if (unlikely(!InRange(offset, len, size_))) {
978         return ZX_ERR_OUT_OF_RANGE;
979     }
980 
981     const uint64_t start_page_offset = ROUNDDOWN(offset, PAGE_SIZE);
982     const uint64_t end_page_offset = ROUNDUP(offset + len, PAGE_SIZE);
983 
984     uint64_t expected_next_off = start_page_offset;
985     zx_status_t status = page_list_.ForEveryPageInRange(
986         [&expected_next_off, this, lookup_fn, context,
987          start_page_offset](const auto p, uint64_t off) {
988 
989             // If some page was missing from our list, run the more expensive
990             // GetPageLocked to see if our parent has it.
991             for (uint64_t missing_off = expected_next_off; missing_off < off;
992                  missing_off += PAGE_SIZE) {
993 
994                 paddr_t pa;
995                 zx_status_t status = this->GetPageLocked(missing_off, 0, nullptr,
996                                                          nullptr, &pa);
997                 if (status != ZX_OK) {
998                     return ZX_ERR_NO_MEMORY;
999                 }
1000                 const size_t index = (off - start_page_offset) / PAGE_SIZE;
1001                 status = lookup_fn(context, missing_off, index, pa);
1002                 if (status != ZX_OK) {
1003                     if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
1004                         status = ZX_ERR_INTERNAL;
1005                     }
1006                     return status;
1007                 }
1008             }
1009 
1010             const size_t index = (off - start_page_offset) / PAGE_SIZE;
1011             paddr_t pa = p->paddr();
1012             zx_status_t status = lookup_fn(context, off, index, pa);
1013             if (status != ZX_OK) {
1014                 if (unlikely(status == ZX_ERR_NEXT || status == ZX_ERR_STOP)) {
1015                     status = ZX_ERR_INTERNAL;
1016                 }
1017                 return status;
1018             }
1019 
1020             expected_next_off = off + PAGE_SIZE;
1021             return ZX_ERR_NEXT;
1022         },
1023         start_page_offset, end_page_offset);
1024     if (status != ZX_OK) {
1025         return status;
1026     }
1027 
1028     // If expected_next_off isn't at the end, there's a gap to process
1029     for (uint64_t off = expected_next_off; off < end_page_offset; off += PAGE_SIZE) {
1030         paddr_t pa;
1031         zx_status_t status = GetPageLocked(off, 0, nullptr, nullptr, &pa);
1032         if (status != ZX_OK) {
1033             return ZX_ERR_NO_MEMORY;
1034         }
1035         const size_t index = (off - start_page_offset) / PAGE_SIZE;
1036         status = lookup_fn(context, off, index, pa);
1037         if (status != ZX_OK) {
1038             return status;
1039         }
1040     }
1041 
1042     return ZX_OK;
1043 }
1044 
ReadUser(user_out_ptr<void> ptr,uint64_t offset,size_t len)1045 zx_status_t VmObjectPaged::ReadUser(user_out_ptr<void> ptr, uint64_t offset, size_t len) {
1046     canary_.Assert();
1047 
1048     // read routine that uses copy_to_user
1049     auto read_routine = [ptr](const void* src, size_t offset, size_t len) -> zx_status_t {
1050         return ptr.byte_offset(offset).copy_array_to_user(src, len);
1051     };
1052 
1053     return ReadWriteInternal(offset, len, false, read_routine);
1054 }
1055 
WriteUser(user_in_ptr<const void> ptr,uint64_t offset,size_t len)1056 zx_status_t VmObjectPaged::WriteUser(user_in_ptr<const void> ptr, uint64_t offset, size_t len) {
1057     canary_.Assert();
1058 
1059     // write routine that uses copy_from_user
1060     auto write_routine = [ptr](void* dst, size_t offset, size_t len) -> zx_status_t {
1061         return ptr.byte_offset(offset).copy_array_from_user(dst, len);
1062     };
1063 
1064     return ReadWriteInternal(offset, len, true, write_routine);
1065 }
1066 
InvalidateCache(const uint64_t offset,const uint64_t len)1067 zx_status_t VmObjectPaged::InvalidateCache(const uint64_t offset, const uint64_t len) {
1068     return CacheOp(offset, len, CacheOpType::Invalidate);
1069 }
1070 
CleanCache(const uint64_t offset,const uint64_t len)1071 zx_status_t VmObjectPaged::CleanCache(const uint64_t offset, const uint64_t len) {
1072     return CacheOp(offset, len, CacheOpType::Clean);
1073 }
1074 
CleanInvalidateCache(const uint64_t offset,const uint64_t len)1075 zx_status_t VmObjectPaged::CleanInvalidateCache(const uint64_t offset, const uint64_t len) {
1076     return CacheOp(offset, len, CacheOpType::CleanInvalidate);
1077 }
1078 
SyncCache(const uint64_t offset,const uint64_t len)1079 zx_status_t VmObjectPaged::SyncCache(const uint64_t offset, const uint64_t len) {
1080     return CacheOp(offset, len, CacheOpType::Sync);
1081 }
1082 
CacheOp(const uint64_t start_offset,const uint64_t len,const CacheOpType type)1083 zx_status_t VmObjectPaged::CacheOp(const uint64_t start_offset, const uint64_t len,
1084                                    const CacheOpType type) {
1085     canary_.Assert();
1086 
1087     if (unlikely(len == 0)) {
1088         return ZX_ERR_INVALID_ARGS;
1089     }
1090 
1091     Guard<fbl::Mutex> guard{&lock_};
1092 
1093     if (unlikely(!InRange(start_offset, len, size_))) {
1094         return ZX_ERR_OUT_OF_RANGE;
1095     }
1096 
1097     const size_t end_offset = static_cast<size_t>(start_offset + len);
1098     size_t op_start_offset = static_cast<size_t>(start_offset);
1099 
1100     while (op_start_offset != end_offset) {
1101         // Offset at the end of the current page.
1102         const size_t page_end_offset = ROUNDUP(op_start_offset + 1, PAGE_SIZE);
1103 
1104         // This cache op will either terminate at the end of the current page or
1105         // at the end of the whole op range -- whichever comes first.
1106         const size_t op_end_offset = MIN(page_end_offset, end_offset);
1107 
1108         const size_t cache_op_len = op_end_offset - op_start_offset;
1109 
1110         const size_t page_offset = op_start_offset % PAGE_SIZE;
1111 
1112         // lookup the physical address of the page, careful not to fault in a new one
1113         paddr_t pa;
1114         auto status = GetPageLocked(op_start_offset, 0, nullptr, nullptr, &pa);
1115 
1116         if (likely(status == ZX_OK)) {
1117             // Convert the page address to a Kernel virtual address.
1118             const void* ptr = paddr_to_physmap(pa);
1119             const addr_t cache_op_addr = reinterpret_cast<addr_t>(ptr) + page_offset;
1120 
1121             LTRACEF("ptr %p op %d\n", ptr, (int)type);
1122 
1123             // Perform the necessary cache op against this page.
1124             switch (type) {
1125             case CacheOpType::Invalidate:
1126                 arch_invalidate_cache_range(cache_op_addr, cache_op_len);
1127                 break;
1128             case CacheOpType::Clean:
1129                 arch_clean_cache_range(cache_op_addr, cache_op_len);
1130                 break;
1131             case CacheOpType::CleanInvalidate:
1132                 arch_clean_invalidate_cache_range(cache_op_addr, cache_op_len);
1133                 break;
1134             case CacheOpType::Sync:
1135                 arch_sync_cache_range(cache_op_addr, cache_op_len);
1136                 break;
1137             }
1138         }
1139 
1140         op_start_offset += cache_op_len;
1141     }
1142 
1143     return ZX_OK;
1144 }
1145 
GetMappingCachePolicy() const1146 uint32_t VmObjectPaged::GetMappingCachePolicy() const {
1147     Guard<fbl::Mutex> guard{&lock_};
1148 
1149     return cache_policy_;
1150 }
1151 
SetMappingCachePolicy(const uint32_t cache_policy)1152 zx_status_t VmObjectPaged::SetMappingCachePolicy(const uint32_t cache_policy) {
1153     // Is it a valid cache flag?
1154     if (cache_policy & ~ZX_CACHE_POLICY_MASK) {
1155         return ZX_ERR_INVALID_ARGS;
1156     }
1157 
1158     Guard<fbl::Mutex> guard{&lock_};
1159 
1160     // conditions for allowing the cache policy to be set:
1161     // 1) vmo has no pages committed currently
1162     // 2) vmo has no mappings
1163     // 3) vmo has no clones
1164     // 4) vmo is not a clone
1165     if (!page_list_.IsEmpty()) {
1166         return ZX_ERR_BAD_STATE;
1167     }
1168     if (!mapping_list_.is_empty()) {
1169         return ZX_ERR_BAD_STATE;
1170     }
1171     if (!children_list_.is_empty()) {
1172         return ZX_ERR_BAD_STATE;
1173     }
1174     if (parent_) {
1175         return ZX_ERR_BAD_STATE;
1176     }
1177 
1178     cache_policy_ = cache_policy;
1179 
1180     return ZX_OK;
1181 }
1182 
RangeChangeUpdateFromParentLocked(const uint64_t offset,const uint64_t len)1183 void VmObjectPaged::RangeChangeUpdateFromParentLocked(const uint64_t offset, const uint64_t len) {
1184     canary_.Assert();
1185 
1186     LTRACEF("offset %#" PRIx64 " len %#" PRIx64 " p_offset %#" PRIx64 " size_ %#" PRIx64 "\n",
1187             offset, len, parent_offset_, size_);
1188 
1189     // our parent is notifying that a range of theirs changed, see where it intersects
1190     // with our offset into the parent and pass it on
1191     uint64_t offset_new;
1192     uint64_t len_new;
1193     if (!GetIntersect(parent_offset_, size_, offset, len,
1194                       &offset_new, &len_new)) {
1195         return;
1196     }
1197 
1198     // if they intersect with us, then by definition the new offset must be >= parent_offset_
1199     DEBUG_ASSERT(offset_new >= parent_offset_);
1200 
1201     // subtract our offset
1202     offset_new -= parent_offset_;
1203 
1204     // verify that it's still within range of us
1205     DEBUG_ASSERT(offset_new + len_new <= size_);
1206 
1207     LTRACEF("new offset %#" PRIx64 " new len %#" PRIx64 "\n",
1208             offset_new, len_new);
1209 
1210     // pass it on
1211     // TODO: optimize by not passing on ranges that are completely covered by pages local to this vmo
1212     RangeChangeUpdateLocked(offset_new, len_new);
1213 }
1214