1 // Copyright 2018 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <inttypes.h>
6 #include <stdint.h>
7 
8 #include <bitmap/raw-bitmap.h>
9 #include <bitmap/rle-bitmap.h>
10 #include <blobfs/allocator.h>
11 #include <blobfs/common.h>
12 #include <blobfs/extent-reserver.h>
13 #include <blobfs/format.h>
14 #include <blobfs/iterator/extent-iterator.h>
15 #include <blobfs/node-reserver.h>
16 #include <fbl/algorithm.h>
17 #include <fbl/vector.h>
18 #include <fs/trace.h>
19 #include <lib/fzl/resizeable-vmo-mapper.h>
20 #include <lib/zx/vmo.h>
21 #include <zircon/types.h>
22 
23 namespace blobfs {
24 
Allocator(SpaceManager * space_manager,RawBitmap block_map,fzl::ResizeableVmoMapper node_map)25 Allocator::Allocator(SpaceManager* space_manager, RawBitmap block_map,
26                      fzl::ResizeableVmoMapper node_map)
27     : space_manager_(space_manager), block_map_(std::move(block_map)),
28       node_map_(std::move(node_map)) {}
29 
~Allocator()30 Allocator::~Allocator() {
31     if (block_map_vmoid_ != VMOID_INVALID) {
32         space_manager_->DetachVmo(block_map_vmoid_);
33     }
34     if (node_map_vmoid_ != VMOID_INVALID) {
35         space_manager_->DetachVmo(node_map_vmoid_);
36     }
37 }
38 
GetNode(uint32_t node_index)39 Inode* Allocator::GetNode(uint32_t node_index) {
40     ZX_DEBUG_ASSERT(node_index < node_map_.size() / kBlobfsInodeSize);
41     return &reinterpret_cast<Inode*>(node_map_.start())[node_index];
42 }
43 
CheckBlocksAllocated(uint64_t start_block,uint64_t end_block,uint64_t * out_first_unset) const44 bool Allocator::CheckBlocksAllocated(uint64_t start_block, uint64_t end_block,
45                                      uint64_t* out_first_unset) const {
46     uint64_t unset_bit;
47     bool allocated = block_map_.Get(start_block, end_block, &unset_bit);
48     if (!allocated && out_first_unset != nullptr) {
49         *out_first_unset = unset_bit;
50     }
51     return allocated;
52 }
53 
ResetBlockMapSize()54 zx_status_t Allocator::ResetBlockMapSize() {
55     ZX_DEBUG_ASSERT(ReservedBlockCount() == 0);
56     const auto info = space_manager_->Info();
57     uint64_t new_size = info.data_block_count;
58     if (new_size != block_map_.size()) {
59         uint64_t rounded_size = BlockMapBlocks(info) * kBlobfsBlockBits;
60         zx_status_t status = block_map_.Reset(rounded_size);
61         if (status != ZX_OK) {
62             return status;
63         }
64 
65         if (new_size < rounded_size) {
66             // In the event that the requested block count is not a multiple
67             // of the nearest block size, shrink down to the actual block
68             // count.
69             status = block_map_.Shrink(new_size);
70             if (status != ZX_OK) {
71                 return status;
72             }
73         }
74     }
75     return ZX_OK;
76 }
77 
ResetNodeMapSize()78 zx_status_t Allocator::ResetNodeMapSize() {
79     ZX_DEBUG_ASSERT(ReservedNodeCount() == 0);
80     const auto info = space_manager_->Info();
81     uint64_t nodemap_size = kBlobfsInodeSize * info.inode_count;
82     if (fbl::round_up(nodemap_size, kBlobfsBlockSize) != nodemap_size) {
83         return ZX_ERR_BAD_STATE;
84     }
85     ZX_DEBUG_ASSERT(nodemap_size / kBlobfsBlockSize == NodeMapBlocks(info));
86 
87     if (nodemap_size > node_map_.size()) {
88         zx_status_t status = node_map_.Grow(nodemap_size);
89         if (status != ZX_OK) {
90             return status;
91         }
92     } else if (nodemap_size < node_map_.size()) {
93         zx_status_t status = node_map_.Shrink(nodemap_size);
94         if (status != ZX_OK) {
95             return status;
96         }
97     }
98     return ZX_OK;
99 }
100 
ResetFromStorage(fs::ReadTxn txn)101 zx_status_t Allocator::ResetFromStorage(fs::ReadTxn txn) {
102     ZX_DEBUG_ASSERT(ReservedBlockCount() == 0);
103     ZX_DEBUG_ASSERT(ReservedNodeCount() == 0);
104     zx_status_t status;
105     if (block_map_vmoid_ == VMOID_INVALID) {
106         status = space_manager_->AttachVmo(block_map_.StorageUnsafe()->GetVmo(),
107                                            &block_map_vmoid_);
108         if (status != ZX_OK) {
109             return status;
110         }
111     }
112     if (node_map_vmoid_ == VMOID_INVALID) {
113         status = space_manager_->AttachVmo(node_map_.vmo(), &node_map_vmoid_);
114         if (status != ZX_OK) {
115             return status;
116         }
117     }
118     const auto info = space_manager_->Info();
119     txn.Enqueue(block_map_vmoid_, 0, BlockMapStartBlock(info), BlockMapBlocks(info));
120     txn.Enqueue(node_map_vmoid_, 0, NodeMapStartBlock(info), NodeMapBlocks(info));
121     return txn.Transact();
122 }
123 
GetBlockMapVmo() const124 const zx::vmo& Allocator::GetBlockMapVmo() const {
125     return block_map_.StorageUnsafe()->GetVmo();
126 }
127 
GetNodeMapVmo() const128 const zx::vmo& Allocator::GetNodeMapVmo() const {
129     return node_map_.vmo();
130 }
131 
ReserveBlocks(uint64_t num_blocks,fbl::Vector<ReservedExtent> * out_extents)132 zx_status_t Allocator::ReserveBlocks(uint64_t num_blocks,
133                                      fbl::Vector<ReservedExtent>* out_extents) {
134     zx_status_t status;
135     uint64_t actual_blocks;
136 
137     // TODO(smklein): If we allocate blocks up to the end of the block map, extend, and continue
138     // allocating, we'll create two extents where one would suffice.
139     // If we knew how many reserved / allocated blocks existed we could resize ahead-of-time and
140     // flatten this case, as an optimization.
141 
142     if ((status = FindBlocks(0, num_blocks, out_extents, &actual_blocks) != ZX_OK)) {
143         // If we have run out of blocks, attempt to add block slices via FVM.
144         // The new 'hint' is the first location we could try to find blocks
145         // after merely extending the allocation maps.
146         uint64_t hint = block_map_.size() - fbl::min(num_blocks, block_map_.size());
147 
148         ZX_DEBUG_ASSERT(actual_blocks < num_blocks);
149         num_blocks -= actual_blocks;
150 
151         if ((status = space_manager_->AddBlocks(num_blocks, &block_map_) != ZX_OK) ||
152             (status = FindBlocks(hint, num_blocks, out_extents, &actual_blocks)) != ZX_OK) {
153             LogAllocationFailure(num_blocks);
154             out_extents->reset();
155             return ZX_ERR_NO_SPACE;
156         }
157     }
158     return ZX_OK;
159 }
160 
MarkBlocksAllocated(const ReservedExtent & reserved_extent)161 void Allocator::MarkBlocksAllocated(const ReservedExtent& reserved_extent) {
162     const Extent& extent = reserved_extent.extent();
163     uint64_t start = extent.Start();
164     uint64_t length = extent.Length();
165     uint64_t end = start + length;
166 
167     ZX_DEBUG_ASSERT(CheckBlocksUnallocated(start, end));
168     ZX_ASSERT(block_map_.Set(start, end) == ZX_OK);
169 }
170 
FreeBlocks(const Extent & extent)171 void Allocator::FreeBlocks(const Extent& extent) {
172     uint64_t start = extent.Start();
173     uint64_t length = extent.Length();
174     uint64_t end = start + length;
175 
176     ZX_DEBUG_ASSERT(CheckBlocksAllocated(start, end));
177     ZX_ASSERT(block_map_.Clear(start, end) == ZX_OK);
178 }
179 
ReserveNodes(uint64_t num_nodes,fbl::Vector<ReservedNode> * out_nodes)180 zx_status_t Allocator::ReserveNodes(uint64_t num_nodes, fbl::Vector<ReservedNode>* out_nodes) {
181     for (uint64_t i = 0; i < num_nodes; i++) {
182         std::optional<ReservedNode> node = ReserveNode();
183         if (!node) {
184             out_nodes->reset();
185             return ZX_ERR_NO_SPACE;
186         }
187         out_nodes->push_back(std::move(*node));
188     }
189     return ZX_OK;
190 }
191 
ReserveNode()192 std::optional<ReservedNode> Allocator::ReserveNode() {
193     TRACE_DURATION("blobfs", "ReserveNode");
194     uint32_t node_index;
195     zx_status_t status;
196     if ((status = FindNode(&node_index)) != ZX_OK) {
197         // If we didn't find any free inodes, try adding more via FVM.
198         if ((status = space_manager_->AddInodes(&node_map_) != ZX_OK) ||
199             (status = FindNode(&node_index) != ZX_OK)) {
200             return std::nullopt;
201         }
202     }
203 
204     ZX_DEBUG_ASSERT(!GetNode(node_index)->header.IsAllocated());
205     std::optional<ReservedNode> node(ReservedNode(this, node_index));
206 
207     // We don't know where the next free node is, but we know that the
208     // current implementation of the allocator always allocates the lowest available
209     // node.
210     SetFreeNodeLowerBound(node_index + 1);
211     return node;
212 }
213 
MarkInodeAllocated(const ReservedNode & node)214 void Allocator::MarkInodeAllocated(const ReservedNode& node) {
215     Inode* mapped_inode = GetNode(node.index());
216     mapped_inode->header.flags = kBlobFlagAllocated;
217     mapped_inode->header.next_node = 0;
218 }
219 
MarkContainerNodeAllocated(const ReservedNode & node,uint32_t previous_node)220 void Allocator::MarkContainerNodeAllocated(const ReservedNode& node, uint32_t previous_node) {
221     const uint32_t index = node.index();
222     GetNode(previous_node)->header.next_node = index;
223     ExtentContainer* container = GetNode(index)->AsExtentContainer();
224     container->header.flags = kBlobFlagAllocated | kBlobFlagExtentContainer;
225     container->header.next_node = 0;
226     container->previous_node = previous_node;
227     container->extent_count = 0;
228 }
229 
FreeNode(uint32_t node_index)230 void Allocator::FreeNode(uint32_t node_index) {
231     SetFreeNodeLowerBoundIfSmallest(node_index);
232     GetNode(node_index)->header.flags = 0;
233 }
234 
CheckBlocksUnallocated(uint64_t start_block,uint64_t end_block) const235 bool Allocator::CheckBlocksUnallocated(uint64_t start_block, uint64_t end_block) const {
236     ZX_DEBUG_ASSERT(end_block > start_block);
237     uint64_t blkno_out;
238     return block_map_.Find(false, start_block, end_block, end_block - start_block,
239                            &blkno_out) == ZX_OK;
240 }
241 
FindUnallocatedExtent(uint64_t start,uint64_t block_length,uint64_t * out_start,uint64_t * out_block_length)242 bool Allocator::FindUnallocatedExtent(uint64_t start, uint64_t block_length,
243                                       uint64_t* out_start, uint64_t* out_block_length) {
244     bool restart = false;
245     // Constraint: No contiguous run which can extend beyond the block
246     // bitmap.
247     block_length = fbl::min(block_length, block_map_.size() - start);
248     uint64_t first_already_allocated;
249     if (!block_map_.Scan(start, start + block_length, false, &first_already_allocated)) {
250         // Part of [start, start + block_length) is already allocated.
251         if (first_already_allocated == start) {
252             // Jump past as much of the allocated region as possible,
253             // and then restart searching for more free blocks.
254             uint64_t first_free;
255             if (block_map_.Scan(start, start + block_length, true, &first_free)) {
256                 // All bits are allocated; jump past this entire portion
257                 // of allocated blocks.
258                 start += block_length;
259             } else {
260                 // Not all blocks are allocated; jump to the first free block we can find.
261                 ZX_DEBUG_ASSERT(first_free > start);
262                 start = first_free;
263             }
264             // We recommend restarting the search in this case because
265             // although there was a prefix collision, the suffix of this
266             // region may be followed by additional free blocks.
267             restart = true;
268         } else {
269             // Since |start| is free, we'll try allocating from as much of this region
270             // as we can until we hit previously-allocated blocks.
271             ZX_DEBUG_ASSERT(first_already_allocated > start);
272             block_length = first_already_allocated - start;
273         }
274     }
275 
276     *out_start = start;
277     *out_block_length = block_length;
278     return restart;
279 }
280 
MunchUnreservedExtents(bitmap::RleBitmap::const_iterator reserved_iterator,uint64_t remaining_blocks,uint64_t start,uint64_t block_length,fbl::Vector<ReservedExtent> * out_extents,bitmap::RleBitmap::const_iterator * out_reserved_iterator,uint64_t * out_remaining_blocks,uint64_t * out_start,uint64_t * out_block_length)281 bool Allocator::MunchUnreservedExtents(bitmap::RleBitmap::const_iterator reserved_iterator,
282                                        uint64_t remaining_blocks,
283                                        uint64_t start,
284                                        uint64_t block_length,
285                                        fbl::Vector<ReservedExtent>* out_extents,
286                                        bitmap::RleBitmap::const_iterator* out_reserved_iterator,
287                                        uint64_t* out_remaining_blocks,
288                                        uint64_t* out_start,
289                                        uint64_t* out_block_length) {
290     bool collision = false;
291 
292     const uint64_t start_max = start + block_length;
293 
294     // There are remaining in-flight reserved blocks we might collide with;
295     // verify this allocation is not being held by another write operation.
296     while ((start < start_max) &&
297            (block_length != 0) &&
298            (reserved_iterator != ReservedBlocksCend())) {
299         // We should only be considering blocks which are not allocated.
300         ZX_DEBUG_ASSERT(start < start + block_length);
301         ZX_DEBUG_ASSERT(block_map_.Scan(start, start + block_length, false));
302 
303         if (reserved_iterator->end() <= start) {
304             // The reserved iterator is lagging behind this region.
305             ZX_DEBUG_ASSERT(reserved_iterator != ReservedBlocksCend());
306             reserved_iterator++;
307         } else if (start + block_length <= reserved_iterator->start()) {
308             // The remaining reserved blocks occur after this free region;
309             // this allocation doesn't collide.
310             break;
311         } else {
312             // The reserved region ends at/after the start of the allocation.
313             // The reserved region starts before the end of the allocation.
314             //
315             // This implies a collision exists.
316             collision = true;
317             if (start >= reserved_iterator->start() &&
318                 start + block_length <= reserved_iterator->end()) {
319                 // The collision is total; move past the entire reserved region.
320                 start = reserved_iterator->end();
321                 block_length = 0;
322                 break;
323             }
324             if (start < reserved_iterator->start()) {
325                 // Free Prefix: Although the observed range overlaps with a
326                 // reservation, it includes a prefix which is free from
327                 // overlap.
328                 //
329                 // Take the most of the proposed allocation *before* the reservation.
330                 Extent extent(start, static_cast<BlockCountType>(reserved_iterator->start() -
331                                                                  start));
332                 ZX_DEBUG_ASSERT(block_map_.Scan(extent.Start(),
333                                                 extent.Start() + extent.Length(),
334                                                 false));
335                 ZX_DEBUG_ASSERT(block_length > extent.Length());
336                 // Jump past the end of this reservation.
337                 uint64_t reserved_length = reserved_iterator->end() - reserved_iterator->start();
338                 if (block_length > extent.Length() + reserved_length) {
339                     block_length -= extent.Length() + reserved_length;
340                 } else {
341                     block_length = 0;
342                 }
343                 start = reserved_iterator->end();
344                 remaining_blocks -= extent.Length();
345                 out_extents->push_back(ReservedExtent(this, std::move(extent)));
346                 reserved_iterator = ReservedBlocksCbegin();
347             } else {
348                 // Free Suffix: The observed range overlaps with a
349                 // reservation, but not entirely.
350                 //
351                 // Jump to the end of the reservation, as free space exists
352                 // there.
353                 ZX_DEBUG_ASSERT(start + block_length > reserved_iterator->end());
354                 block_length = (start + block_length) - reserved_iterator->end();
355                 start = reserved_iterator->end();
356             }
357         }
358     }
359 
360     *out_remaining_blocks = remaining_blocks;
361     *out_reserved_iterator = reserved_iterator;
362     *out_start = start;
363     *out_block_length = block_length;
364     return collision;
365 }
366 
FindBlocks(uint64_t start,uint64_t num_blocks,fbl::Vector<ReservedExtent> * out_extents,uint64_t * out_actual_blocks)367 zx_status_t Allocator::FindBlocks(uint64_t start, uint64_t num_blocks,
368                                   fbl::Vector<ReservedExtent>* out_extents,
369                                   uint64_t* out_actual_blocks) {
370     // Using a single iterator over the reserved allocation map lets us
371     // avoid re-scanning portions of the reserved map. This is possible
372     // because the |reserved_blocks_| map should be immutable
373     // for the duration of this method, unless we actually find blocks, at
374     // which point the iterator is reset.
375     auto reserved_iterator = ReservedBlocksCbegin();
376 
377     uint64_t remaining_blocks = num_blocks;
378     while (remaining_blocks != 0) {
379         // Look for |block_length| contiguous free blocks.
380         if (start >= block_map_.size()) {
381             *out_actual_blocks = num_blocks - remaining_blocks;
382             return ZX_ERR_NO_SPACE;
383         }
384         // Constraint: No contiguous run longer than the maximum permitted
385         // extent.
386         uint64_t block_length = fbl::min(remaining_blocks, kBlockCountMax);
387 
388         bool restart_search = FindUnallocatedExtent(start, block_length,
389                                                     &start, &block_length);
390         if (restart_search) {
391             continue;
392         }
393 
394         // [start, start + block_length) is now a valid region of free blocks.
395         //
396         // Take the subset of this range that doesn't intersect with reserved blocks,
397         // and add it to our extent list.
398         restart_search = MunchUnreservedExtents(reserved_iterator, remaining_blocks, start,
399                                                 block_length, out_extents, &reserved_iterator,
400                                                 &remaining_blocks, &start, &block_length);
401         if (restart_search) {
402             continue;
403         }
404 
405         if (block_length != 0) {
406             // The remainder of this window exists and does not collide with either
407             // the reservation map nor the committed blocks.
408             Extent extent(start, static_cast<BlockCountType>(block_length));
409             ZX_DEBUG_ASSERT(block_map_.Scan(extent.Start(),
410                                             extent.Start() + extent.Length(),
411                                             false));
412             start += extent.Length();
413             remaining_blocks -= extent.Length();
414             out_extents->push_back(ReservedExtent(this, std::move(extent)));
415             reserved_iterator = ReservedBlocksCbegin();
416         }
417     }
418 
419     *out_actual_blocks = num_blocks;
420     return ZX_OK;
421 }
422 
FindNode(uint32_t * node_index_out)423 zx_status_t Allocator::FindNode(uint32_t* node_index_out) {
424     uint32_t inode_count = static_cast<uint32_t>(space_manager_->Info().inode_count);
425     for (uint32_t i = FreeNodeLowerBound(); i < inode_count; ++i) {
426         if (!GetNode(i)->header.IsAllocated()) {
427             // Found a free node, which is also not reserved.
428             if (!IsNodeReserved(i)) {
429                 *node_index_out = i;
430                 return ZX_OK;
431             }
432         }
433     }
434 
435     // There are no free nodes available. Setting the free node lower bound to
436     // inodes_count will help to fail fast for next allocation. This will
437     // also help to find nodes if nodes are added.
438     SetFreeNodeLowerBound(inode_count);
439     return ZX_ERR_OUT_OF_RANGE;
440 }
441 
LogAllocationFailure(uint64_t num_blocks) const442 void Allocator::LogAllocationFailure(uint64_t num_blocks) const {
443     const Superblock& info = space_manager_->Info();
444     const uint64_t requested_bytes = num_blocks * info.block_size;
445     const uint64_t total_bytes = info.data_block_count * info.block_size;
446     const uint64_t persisted_used_bytes = info.alloc_block_count * info.block_size;
447     const uint64_t pending_used_bytes = ReservedBlockCount() * info.block_size;
448     const uint64_t used_bytes = persisted_used_bytes + pending_used_bytes;
449     ZX_ASSERT_MSG(used_bytes <= total_bytes,
450                   "blobfs using more bytes than available: %" PRIu64 " > %" PRIu64 "\n",
451                   used_bytes, total_bytes);
452     const uint64_t free_bytes = total_bytes - used_bytes;
453 
454     if (!log_allocation_failure_) {
455         return;
456     }
457 
458     FS_TRACE_ERROR("Blobfs has run out of space on persistent storage.\n");
459     FS_TRACE_ERROR("    Could not allocate %" PRIu64 " bytes\n", requested_bytes);
460     FS_TRACE_ERROR("    Total data bytes  : %" PRIu64 "\n", total_bytes);
461     FS_TRACE_ERROR("    Used data bytes   : %" PRIu64 "\n", persisted_used_bytes);
462     FS_TRACE_ERROR("    Preallocated bytes: %" PRIu64 "\n", pending_used_bytes);
463     FS_TRACE_ERROR("    Free data bytes   : %" PRIu64 "\n", free_bytes);
464     FS_TRACE_ERROR("    This allocation failure is the result of %s.\n",
465                    requested_bytes <= free_bytes ? "fragmentation" : "over-allocation");
466 }
467 
468 } // namespace blobfs
469