1 // Copyright 2018 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <inttypes.h>
6 #include <stdint.h>
7
8 #include <bitmap/raw-bitmap.h>
9 #include <bitmap/rle-bitmap.h>
10 #include <blobfs/allocator.h>
11 #include <blobfs/common.h>
12 #include <blobfs/extent-reserver.h>
13 #include <blobfs/format.h>
14 #include <blobfs/iterator/extent-iterator.h>
15 #include <blobfs/node-reserver.h>
16 #include <fbl/algorithm.h>
17 #include <fbl/vector.h>
18 #include <fs/trace.h>
19 #include <lib/fzl/resizeable-vmo-mapper.h>
20 #include <lib/zx/vmo.h>
21 #include <zircon/types.h>
22
23 namespace blobfs {
24
Allocator(SpaceManager * space_manager,RawBitmap block_map,fzl::ResizeableVmoMapper node_map)25 Allocator::Allocator(SpaceManager* space_manager, RawBitmap block_map,
26 fzl::ResizeableVmoMapper node_map)
27 : space_manager_(space_manager), block_map_(std::move(block_map)),
28 node_map_(std::move(node_map)) {}
29
~Allocator()30 Allocator::~Allocator() {
31 if (block_map_vmoid_ != VMOID_INVALID) {
32 space_manager_->DetachVmo(block_map_vmoid_);
33 }
34 if (node_map_vmoid_ != VMOID_INVALID) {
35 space_manager_->DetachVmo(node_map_vmoid_);
36 }
37 }
38
GetNode(uint32_t node_index)39 Inode* Allocator::GetNode(uint32_t node_index) {
40 ZX_DEBUG_ASSERT(node_index < node_map_.size() / kBlobfsInodeSize);
41 return &reinterpret_cast<Inode*>(node_map_.start())[node_index];
42 }
43
CheckBlocksAllocated(uint64_t start_block,uint64_t end_block,uint64_t * out_first_unset) const44 bool Allocator::CheckBlocksAllocated(uint64_t start_block, uint64_t end_block,
45 uint64_t* out_first_unset) const {
46 uint64_t unset_bit;
47 bool allocated = block_map_.Get(start_block, end_block, &unset_bit);
48 if (!allocated && out_first_unset != nullptr) {
49 *out_first_unset = unset_bit;
50 }
51 return allocated;
52 }
53
ResetBlockMapSize()54 zx_status_t Allocator::ResetBlockMapSize() {
55 ZX_DEBUG_ASSERT(ReservedBlockCount() == 0);
56 const auto info = space_manager_->Info();
57 uint64_t new_size = info.data_block_count;
58 if (new_size != block_map_.size()) {
59 uint64_t rounded_size = BlockMapBlocks(info) * kBlobfsBlockBits;
60 zx_status_t status = block_map_.Reset(rounded_size);
61 if (status != ZX_OK) {
62 return status;
63 }
64
65 if (new_size < rounded_size) {
66 // In the event that the requested block count is not a multiple
67 // of the nearest block size, shrink down to the actual block
68 // count.
69 status = block_map_.Shrink(new_size);
70 if (status != ZX_OK) {
71 return status;
72 }
73 }
74 }
75 return ZX_OK;
76 }
77
ResetNodeMapSize()78 zx_status_t Allocator::ResetNodeMapSize() {
79 ZX_DEBUG_ASSERT(ReservedNodeCount() == 0);
80 const auto info = space_manager_->Info();
81 uint64_t nodemap_size = kBlobfsInodeSize * info.inode_count;
82 if (fbl::round_up(nodemap_size, kBlobfsBlockSize) != nodemap_size) {
83 return ZX_ERR_BAD_STATE;
84 }
85 ZX_DEBUG_ASSERT(nodemap_size / kBlobfsBlockSize == NodeMapBlocks(info));
86
87 if (nodemap_size > node_map_.size()) {
88 zx_status_t status = node_map_.Grow(nodemap_size);
89 if (status != ZX_OK) {
90 return status;
91 }
92 } else if (nodemap_size < node_map_.size()) {
93 zx_status_t status = node_map_.Shrink(nodemap_size);
94 if (status != ZX_OK) {
95 return status;
96 }
97 }
98 return ZX_OK;
99 }
100
ResetFromStorage(fs::ReadTxn txn)101 zx_status_t Allocator::ResetFromStorage(fs::ReadTxn txn) {
102 ZX_DEBUG_ASSERT(ReservedBlockCount() == 0);
103 ZX_DEBUG_ASSERT(ReservedNodeCount() == 0);
104 zx_status_t status;
105 if (block_map_vmoid_ == VMOID_INVALID) {
106 status = space_manager_->AttachVmo(block_map_.StorageUnsafe()->GetVmo(),
107 &block_map_vmoid_);
108 if (status != ZX_OK) {
109 return status;
110 }
111 }
112 if (node_map_vmoid_ == VMOID_INVALID) {
113 status = space_manager_->AttachVmo(node_map_.vmo(), &node_map_vmoid_);
114 if (status != ZX_OK) {
115 return status;
116 }
117 }
118 const auto info = space_manager_->Info();
119 txn.Enqueue(block_map_vmoid_, 0, BlockMapStartBlock(info), BlockMapBlocks(info));
120 txn.Enqueue(node_map_vmoid_, 0, NodeMapStartBlock(info), NodeMapBlocks(info));
121 return txn.Transact();
122 }
123
GetBlockMapVmo() const124 const zx::vmo& Allocator::GetBlockMapVmo() const {
125 return block_map_.StorageUnsafe()->GetVmo();
126 }
127
GetNodeMapVmo() const128 const zx::vmo& Allocator::GetNodeMapVmo() const {
129 return node_map_.vmo();
130 }
131
ReserveBlocks(uint64_t num_blocks,fbl::Vector<ReservedExtent> * out_extents)132 zx_status_t Allocator::ReserveBlocks(uint64_t num_blocks,
133 fbl::Vector<ReservedExtent>* out_extents) {
134 zx_status_t status;
135 uint64_t actual_blocks;
136
137 // TODO(smklein): If we allocate blocks up to the end of the block map, extend, and continue
138 // allocating, we'll create two extents where one would suffice.
139 // If we knew how many reserved / allocated blocks existed we could resize ahead-of-time and
140 // flatten this case, as an optimization.
141
142 if ((status = FindBlocks(0, num_blocks, out_extents, &actual_blocks) != ZX_OK)) {
143 // If we have run out of blocks, attempt to add block slices via FVM.
144 // The new 'hint' is the first location we could try to find blocks
145 // after merely extending the allocation maps.
146 uint64_t hint = block_map_.size() - fbl::min(num_blocks, block_map_.size());
147
148 ZX_DEBUG_ASSERT(actual_blocks < num_blocks);
149 num_blocks -= actual_blocks;
150
151 if ((status = space_manager_->AddBlocks(num_blocks, &block_map_) != ZX_OK) ||
152 (status = FindBlocks(hint, num_blocks, out_extents, &actual_blocks)) != ZX_OK) {
153 LogAllocationFailure(num_blocks);
154 out_extents->reset();
155 return ZX_ERR_NO_SPACE;
156 }
157 }
158 return ZX_OK;
159 }
160
MarkBlocksAllocated(const ReservedExtent & reserved_extent)161 void Allocator::MarkBlocksAllocated(const ReservedExtent& reserved_extent) {
162 const Extent& extent = reserved_extent.extent();
163 uint64_t start = extent.Start();
164 uint64_t length = extent.Length();
165 uint64_t end = start + length;
166
167 ZX_DEBUG_ASSERT(CheckBlocksUnallocated(start, end));
168 ZX_ASSERT(block_map_.Set(start, end) == ZX_OK);
169 }
170
FreeBlocks(const Extent & extent)171 void Allocator::FreeBlocks(const Extent& extent) {
172 uint64_t start = extent.Start();
173 uint64_t length = extent.Length();
174 uint64_t end = start + length;
175
176 ZX_DEBUG_ASSERT(CheckBlocksAllocated(start, end));
177 ZX_ASSERT(block_map_.Clear(start, end) == ZX_OK);
178 }
179
ReserveNodes(uint64_t num_nodes,fbl::Vector<ReservedNode> * out_nodes)180 zx_status_t Allocator::ReserveNodes(uint64_t num_nodes, fbl::Vector<ReservedNode>* out_nodes) {
181 for (uint64_t i = 0; i < num_nodes; i++) {
182 std::optional<ReservedNode> node = ReserveNode();
183 if (!node) {
184 out_nodes->reset();
185 return ZX_ERR_NO_SPACE;
186 }
187 out_nodes->push_back(std::move(*node));
188 }
189 return ZX_OK;
190 }
191
ReserveNode()192 std::optional<ReservedNode> Allocator::ReserveNode() {
193 TRACE_DURATION("blobfs", "ReserveNode");
194 uint32_t node_index;
195 zx_status_t status;
196 if ((status = FindNode(&node_index)) != ZX_OK) {
197 // If we didn't find any free inodes, try adding more via FVM.
198 if ((status = space_manager_->AddInodes(&node_map_) != ZX_OK) ||
199 (status = FindNode(&node_index) != ZX_OK)) {
200 return std::nullopt;
201 }
202 }
203
204 ZX_DEBUG_ASSERT(!GetNode(node_index)->header.IsAllocated());
205 std::optional<ReservedNode> node(ReservedNode(this, node_index));
206
207 // We don't know where the next free node is, but we know that the
208 // current implementation of the allocator always allocates the lowest available
209 // node.
210 SetFreeNodeLowerBound(node_index + 1);
211 return node;
212 }
213
MarkInodeAllocated(const ReservedNode & node)214 void Allocator::MarkInodeAllocated(const ReservedNode& node) {
215 Inode* mapped_inode = GetNode(node.index());
216 mapped_inode->header.flags = kBlobFlagAllocated;
217 mapped_inode->header.next_node = 0;
218 }
219
MarkContainerNodeAllocated(const ReservedNode & node,uint32_t previous_node)220 void Allocator::MarkContainerNodeAllocated(const ReservedNode& node, uint32_t previous_node) {
221 const uint32_t index = node.index();
222 GetNode(previous_node)->header.next_node = index;
223 ExtentContainer* container = GetNode(index)->AsExtentContainer();
224 container->header.flags = kBlobFlagAllocated | kBlobFlagExtentContainer;
225 container->header.next_node = 0;
226 container->previous_node = previous_node;
227 container->extent_count = 0;
228 }
229
FreeNode(uint32_t node_index)230 void Allocator::FreeNode(uint32_t node_index) {
231 SetFreeNodeLowerBoundIfSmallest(node_index);
232 GetNode(node_index)->header.flags = 0;
233 }
234
CheckBlocksUnallocated(uint64_t start_block,uint64_t end_block) const235 bool Allocator::CheckBlocksUnallocated(uint64_t start_block, uint64_t end_block) const {
236 ZX_DEBUG_ASSERT(end_block > start_block);
237 uint64_t blkno_out;
238 return block_map_.Find(false, start_block, end_block, end_block - start_block,
239 &blkno_out) == ZX_OK;
240 }
241
FindUnallocatedExtent(uint64_t start,uint64_t block_length,uint64_t * out_start,uint64_t * out_block_length)242 bool Allocator::FindUnallocatedExtent(uint64_t start, uint64_t block_length,
243 uint64_t* out_start, uint64_t* out_block_length) {
244 bool restart = false;
245 // Constraint: No contiguous run which can extend beyond the block
246 // bitmap.
247 block_length = fbl::min(block_length, block_map_.size() - start);
248 uint64_t first_already_allocated;
249 if (!block_map_.Scan(start, start + block_length, false, &first_already_allocated)) {
250 // Part of [start, start + block_length) is already allocated.
251 if (first_already_allocated == start) {
252 // Jump past as much of the allocated region as possible,
253 // and then restart searching for more free blocks.
254 uint64_t first_free;
255 if (block_map_.Scan(start, start + block_length, true, &first_free)) {
256 // All bits are allocated; jump past this entire portion
257 // of allocated blocks.
258 start += block_length;
259 } else {
260 // Not all blocks are allocated; jump to the first free block we can find.
261 ZX_DEBUG_ASSERT(first_free > start);
262 start = first_free;
263 }
264 // We recommend restarting the search in this case because
265 // although there was a prefix collision, the suffix of this
266 // region may be followed by additional free blocks.
267 restart = true;
268 } else {
269 // Since |start| is free, we'll try allocating from as much of this region
270 // as we can until we hit previously-allocated blocks.
271 ZX_DEBUG_ASSERT(first_already_allocated > start);
272 block_length = first_already_allocated - start;
273 }
274 }
275
276 *out_start = start;
277 *out_block_length = block_length;
278 return restart;
279 }
280
MunchUnreservedExtents(bitmap::RleBitmap::const_iterator reserved_iterator,uint64_t remaining_blocks,uint64_t start,uint64_t block_length,fbl::Vector<ReservedExtent> * out_extents,bitmap::RleBitmap::const_iterator * out_reserved_iterator,uint64_t * out_remaining_blocks,uint64_t * out_start,uint64_t * out_block_length)281 bool Allocator::MunchUnreservedExtents(bitmap::RleBitmap::const_iterator reserved_iterator,
282 uint64_t remaining_blocks,
283 uint64_t start,
284 uint64_t block_length,
285 fbl::Vector<ReservedExtent>* out_extents,
286 bitmap::RleBitmap::const_iterator* out_reserved_iterator,
287 uint64_t* out_remaining_blocks,
288 uint64_t* out_start,
289 uint64_t* out_block_length) {
290 bool collision = false;
291
292 const uint64_t start_max = start + block_length;
293
294 // There are remaining in-flight reserved blocks we might collide with;
295 // verify this allocation is not being held by another write operation.
296 while ((start < start_max) &&
297 (block_length != 0) &&
298 (reserved_iterator != ReservedBlocksCend())) {
299 // We should only be considering blocks which are not allocated.
300 ZX_DEBUG_ASSERT(start < start + block_length);
301 ZX_DEBUG_ASSERT(block_map_.Scan(start, start + block_length, false));
302
303 if (reserved_iterator->end() <= start) {
304 // The reserved iterator is lagging behind this region.
305 ZX_DEBUG_ASSERT(reserved_iterator != ReservedBlocksCend());
306 reserved_iterator++;
307 } else if (start + block_length <= reserved_iterator->start()) {
308 // The remaining reserved blocks occur after this free region;
309 // this allocation doesn't collide.
310 break;
311 } else {
312 // The reserved region ends at/after the start of the allocation.
313 // The reserved region starts before the end of the allocation.
314 //
315 // This implies a collision exists.
316 collision = true;
317 if (start >= reserved_iterator->start() &&
318 start + block_length <= reserved_iterator->end()) {
319 // The collision is total; move past the entire reserved region.
320 start = reserved_iterator->end();
321 block_length = 0;
322 break;
323 }
324 if (start < reserved_iterator->start()) {
325 // Free Prefix: Although the observed range overlaps with a
326 // reservation, it includes a prefix which is free from
327 // overlap.
328 //
329 // Take the most of the proposed allocation *before* the reservation.
330 Extent extent(start, static_cast<BlockCountType>(reserved_iterator->start() -
331 start));
332 ZX_DEBUG_ASSERT(block_map_.Scan(extent.Start(),
333 extent.Start() + extent.Length(),
334 false));
335 ZX_DEBUG_ASSERT(block_length > extent.Length());
336 // Jump past the end of this reservation.
337 uint64_t reserved_length = reserved_iterator->end() - reserved_iterator->start();
338 if (block_length > extent.Length() + reserved_length) {
339 block_length -= extent.Length() + reserved_length;
340 } else {
341 block_length = 0;
342 }
343 start = reserved_iterator->end();
344 remaining_blocks -= extent.Length();
345 out_extents->push_back(ReservedExtent(this, std::move(extent)));
346 reserved_iterator = ReservedBlocksCbegin();
347 } else {
348 // Free Suffix: The observed range overlaps with a
349 // reservation, but not entirely.
350 //
351 // Jump to the end of the reservation, as free space exists
352 // there.
353 ZX_DEBUG_ASSERT(start + block_length > reserved_iterator->end());
354 block_length = (start + block_length) - reserved_iterator->end();
355 start = reserved_iterator->end();
356 }
357 }
358 }
359
360 *out_remaining_blocks = remaining_blocks;
361 *out_reserved_iterator = reserved_iterator;
362 *out_start = start;
363 *out_block_length = block_length;
364 return collision;
365 }
366
FindBlocks(uint64_t start,uint64_t num_blocks,fbl::Vector<ReservedExtent> * out_extents,uint64_t * out_actual_blocks)367 zx_status_t Allocator::FindBlocks(uint64_t start, uint64_t num_blocks,
368 fbl::Vector<ReservedExtent>* out_extents,
369 uint64_t* out_actual_blocks) {
370 // Using a single iterator over the reserved allocation map lets us
371 // avoid re-scanning portions of the reserved map. This is possible
372 // because the |reserved_blocks_| map should be immutable
373 // for the duration of this method, unless we actually find blocks, at
374 // which point the iterator is reset.
375 auto reserved_iterator = ReservedBlocksCbegin();
376
377 uint64_t remaining_blocks = num_blocks;
378 while (remaining_blocks != 0) {
379 // Look for |block_length| contiguous free blocks.
380 if (start >= block_map_.size()) {
381 *out_actual_blocks = num_blocks - remaining_blocks;
382 return ZX_ERR_NO_SPACE;
383 }
384 // Constraint: No contiguous run longer than the maximum permitted
385 // extent.
386 uint64_t block_length = fbl::min(remaining_blocks, kBlockCountMax);
387
388 bool restart_search = FindUnallocatedExtent(start, block_length,
389 &start, &block_length);
390 if (restart_search) {
391 continue;
392 }
393
394 // [start, start + block_length) is now a valid region of free blocks.
395 //
396 // Take the subset of this range that doesn't intersect with reserved blocks,
397 // and add it to our extent list.
398 restart_search = MunchUnreservedExtents(reserved_iterator, remaining_blocks, start,
399 block_length, out_extents, &reserved_iterator,
400 &remaining_blocks, &start, &block_length);
401 if (restart_search) {
402 continue;
403 }
404
405 if (block_length != 0) {
406 // The remainder of this window exists and does not collide with either
407 // the reservation map nor the committed blocks.
408 Extent extent(start, static_cast<BlockCountType>(block_length));
409 ZX_DEBUG_ASSERT(block_map_.Scan(extent.Start(),
410 extent.Start() + extent.Length(),
411 false));
412 start += extent.Length();
413 remaining_blocks -= extent.Length();
414 out_extents->push_back(ReservedExtent(this, std::move(extent)));
415 reserved_iterator = ReservedBlocksCbegin();
416 }
417 }
418
419 *out_actual_blocks = num_blocks;
420 return ZX_OK;
421 }
422
FindNode(uint32_t * node_index_out)423 zx_status_t Allocator::FindNode(uint32_t* node_index_out) {
424 uint32_t inode_count = static_cast<uint32_t>(space_manager_->Info().inode_count);
425 for (uint32_t i = FreeNodeLowerBound(); i < inode_count; ++i) {
426 if (!GetNode(i)->header.IsAllocated()) {
427 // Found a free node, which is also not reserved.
428 if (!IsNodeReserved(i)) {
429 *node_index_out = i;
430 return ZX_OK;
431 }
432 }
433 }
434
435 // There are no free nodes available. Setting the free node lower bound to
436 // inodes_count will help to fail fast for next allocation. This will
437 // also help to find nodes if nodes are added.
438 SetFreeNodeLowerBound(inode_count);
439 return ZX_ERR_OUT_OF_RANGE;
440 }
441
LogAllocationFailure(uint64_t num_blocks) const442 void Allocator::LogAllocationFailure(uint64_t num_blocks) const {
443 const Superblock& info = space_manager_->Info();
444 const uint64_t requested_bytes = num_blocks * info.block_size;
445 const uint64_t total_bytes = info.data_block_count * info.block_size;
446 const uint64_t persisted_used_bytes = info.alloc_block_count * info.block_size;
447 const uint64_t pending_used_bytes = ReservedBlockCount() * info.block_size;
448 const uint64_t used_bytes = persisted_used_bytes + pending_used_bytes;
449 ZX_ASSERT_MSG(used_bytes <= total_bytes,
450 "blobfs using more bytes than available: %" PRIu64 " > %" PRIu64 "\n",
451 used_bytes, total_bytes);
452 const uint64_t free_bytes = total_bytes - used_bytes;
453
454 if (!log_allocation_failure_) {
455 return;
456 }
457
458 FS_TRACE_ERROR("Blobfs has run out of space on persistent storage.\n");
459 FS_TRACE_ERROR(" Could not allocate %" PRIu64 " bytes\n", requested_bytes);
460 FS_TRACE_ERROR(" Total data bytes : %" PRIu64 "\n", total_bytes);
461 FS_TRACE_ERROR(" Used data bytes : %" PRIu64 "\n", persisted_used_bytes);
462 FS_TRACE_ERROR(" Preallocated bytes: %" PRIu64 "\n", pending_used_bytes);
463 FS_TRACE_ERROR(" Free data bytes : %" PRIu64 "\n", free_bytes);
464 FS_TRACE_ERROR(" This allocation failure is the result of %s.\n",
465 requested_bytes <= free_bytes ? "fragmentation" : "over-allocation");
466 }
467
468 } // namespace blobfs
469