1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This file describes the on-disk structure of Blobfs.
6
7 #pragma once
8
9 #include <digest/digest.h>
10 #include <digest/merkle-tree.h>
11 #include <fbl/algorithm.h>
12 #include <fbl/macros.h>
13 #include <zircon/types.h>
14
15 #include <assert.h>
16 #include <limits>
17 #include <stdbool.h>
18 #include <stdint.h>
19
20 #include <algorithm>
21
22 #ifdef __Fuchsia__
23 #include <zircon/syscalls.h>
24 #endif
25
26 // clang-format off
27
28 namespace blobfs {
29 constexpr uint64_t kBlobfsMagic0 = (0xac2153479e694d21ULL);
30 constexpr uint64_t kBlobfsMagic1 = (0x985000d4d4d3d314ULL);
31 constexpr uint32_t kBlobfsVersion = 0x00000007;
32
33 constexpr uint32_t kBlobFlagClean = 1;
34 constexpr uint32_t kBlobFlagDirty = 2;
35 constexpr uint32_t kBlobFlagFVM = 4;
36 constexpr uint32_t kBlobfsBlockSize = 8192;
37 constexpr uint32_t kBlobfsBlockBits = (kBlobfsBlockSize * 8);
38 constexpr uint32_t kBlobfsBlockMapStart = 1;
39 constexpr uint32_t kBlobfsInodeSize = 64;
40 constexpr uint32_t kBlobfsInodesPerBlock = (kBlobfsBlockSize / kBlobfsInodeSize);
41
42 constexpr size_t kFVMBlockMapStart = 0x10000;
43 constexpr size_t kFVMNodeMapStart = 0x20000;
44 constexpr size_t kFVMJournalStart = 0x30000;
45 constexpr size_t kFVMDataStart = 0x40000;
46
47 // Number of metadata blocks allocated for the whole journal: 1 info block.
48 constexpr uint32_t kJournalMetadataBlocks = 1;
49 // Number of metadata blocks allocated for each entry: 2 (header block, commit block).
50 constexpr uint32_t kEntryMetadataBlocks = 2;
51 // Maximum number of data blocks possible for a single entry:
52 // - Blobfs Superblock
53 // - Inode Table Blocks
54 // - Block Bitmap Blocks
55 // TODO(ZX-3076): Calculate the actual upper bound here; this number is not
56 // necessarily considering the worst cases of fragmentation.
57 constexpr uint32_t kMaxEntryDataBlocks = 64;
58 // Minimum possible size for the journal, allowing the maximum size for one entry.
59 constexpr size_t kMinimumJournalBlocks = kJournalMetadataBlocks + kEntryMetadataBlocks +
60 kMaxEntryDataBlocks;
61 constexpr size_t kDefaultJournalBlocks = std::max(kMinimumJournalBlocks, static_cast<size_t>(256));
62
63 constexpr uint64_t kBlobfsDefaultInodeCount = 32768;
64
65 constexpr size_t kMinimumDataBlocks = 2;
66
67 #ifdef __Fuchsia__
68 // Use a heuristics-based approach based on physical RAM size to
69 // determine the size of the writeback buffer.
70 //
71 // Currently, we set the writeback buffer size to 2% of physical
72 // memory.
73 //
74 // Should be invoked with caution; the size of the system's total
75 // memory may eventually change after boot.
WriteBufferSize()76 inline size_t WriteBufferSize() {
77 return fbl::round_up((zx_system_get_physmem() * 2) / 100, kBlobfsBlockSize);
78 }
79 #endif
80
81 constexpr uint64_t kJournalMagic = (0x626c6f626a726e6cULL);
82
83 // Notes:
84 // - block 0 is always allocated
85 // - inode 0 is never used, should be marked allocated but ignored
86
87 struct Superblock {
88 uint64_t magic0;
89 uint64_t magic1;
90 uint32_t version;
91 uint32_t flags;
92 uint32_t block_size; // 8K typical.
93 uint64_t data_block_count; // Number of data blocks in this area.
94 uint64_t journal_block_count; // Number of journal blocks in this area.
95 uint64_t inode_count; // Number of blobs in this area.
96 uint64_t alloc_block_count; // Total number of allocated blocks.
97 uint64_t alloc_inode_count; // Total number of allocated blobs and container nodes.
98 uint64_t blob_header_next; // Block containing next blobfs, or zero if this is the last one.
99 // The following fields are only valid with (flags & kBlobFlagFVM):
100 uint64_t slice_size; // Underlying slice size.
101 uint64_t vslice_count; // Number of underlying slices.
102 uint32_t abm_slices; // Slices allocated to block bitmap.
103 uint32_t ino_slices; // Slices allocated to node map.
104 uint32_t dat_slices; // Slices allocated to file data section.
105 uint32_t journal_slices; // Slices allocated to journal section.
106 };
107
108 static_assert(sizeof(Superblock) <= kBlobfsBlockSize, "Invalid blobfs superblock size");
109
110 // TODO(ZX-2729): Use counter instead of timestamp (for journal info block and entries).
111 struct JournalInfo {
112 uint64_t magic;
113 uint64_t start_block; // Block at which the first journal entry starts.
114 uint64_t num_blocks; // Number of valid blocks currently contained in the journal.
115 uint64_t timestamp; // Timestamp (in ticks) at which the info block was last written.
116 uint32_t checksum; // crc32 checksum of the preceding contents of the info block.
117 };
118
119 static_assert(sizeof(JournalInfo) <= kBlobfsBlockSize, "Journal info size is too large");
120
121 struct HeaderBlock {
122 uint64_t magic;
123 uint64_t timestamp;
124 uint64_t reserved;
125 uint64_t num_blocks;
126 uint64_t target_blocks[kMaxEntryDataBlocks];
127 };
128
129 static_assert(sizeof(HeaderBlock) <= kBlobfsBlockSize, "HeaderBlock size is too large");
130
131 struct CommitBlock {
132 uint64_t magic;
133 uint64_t timestamp; // Timestamp (in ticks) at which the journal entry was written.
134 uint32_t checksum; // crc32 checksum of all preceding blocks in the entry.
135 };
136
137 static_assert(sizeof(CommitBlock) <= kBlobfsBlockSize, "CommitBlock size is too large");
138
BlockMapStartBlock(const Superblock & info)139 constexpr uint64_t BlockMapStartBlock(const Superblock& info) {
140 if (info.flags & kBlobFlagFVM) {
141 return kFVMBlockMapStart;
142 } else {
143 return kBlobfsBlockMapStart;
144 }
145 }
146
BlockMapBlocks(const Superblock & info)147 constexpr uint64_t BlockMapBlocks(const Superblock& info) {
148 return fbl::round_up(info.data_block_count, kBlobfsBlockBits) / kBlobfsBlockBits;
149 }
150
NodeMapStartBlock(const Superblock & info)151 constexpr uint64_t NodeMapStartBlock(const Superblock& info) {
152 // Node map immediately follows the block map
153 if (info.flags & kBlobFlagFVM) {
154 return kFVMNodeMapStart;
155 } else {
156 // Node map immediately follows the block map.
157 return BlockMapStartBlock(info) + BlockMapBlocks(info);
158 }
159 }
160
NodeBitmapBlocks(const Superblock & info)161 constexpr uint64_t NodeBitmapBlocks(const Superblock& info) {
162 return fbl::round_up(info.inode_count, kBlobfsBlockBits) / kBlobfsBlockBits;
163 }
164
NodeMapBlocks(const Superblock & info)165 constexpr uint64_t NodeMapBlocks(const Superblock& info) {
166 return fbl::round_up(info.inode_count, kBlobfsInodesPerBlock) / kBlobfsInodesPerBlock;
167 }
168
JournalStartBlock(const Superblock & info)169 constexpr uint64_t JournalStartBlock(const Superblock& info) {
170 if (info.flags & kBlobFlagFVM) {
171 return kFVMJournalStart;
172 }
173
174 // Journal immediately follows the node map.
175 return NodeMapStartBlock(info) + NodeMapBlocks(info);
176 }
177
JournalBlocks(const Superblock & info)178 constexpr uint64_t JournalBlocks(const Superblock& info) {
179 return info.journal_block_count;
180 }
181
DataStartBlock(const Superblock & info)182 constexpr uint64_t DataStartBlock(const Superblock& info) {
183 if (info.flags & kBlobFlagFVM) {
184 return kFVMDataStart;
185 }
186
187 // Data immediately follows the journal.
188 return JournalStartBlock(info) + JournalBlocks(info);
189 }
190
DataBlocks(const Superblock & info)191 constexpr uint64_t DataBlocks(const Superblock& info) {
192 return info.data_block_count;
193 }
194
TotalBlocks(const Superblock & info)195 constexpr uint64_t TotalBlocks(const Superblock& info) {
196 return BlockMapStartBlock(info) + BlockMapBlocks(info) + NodeMapBlocks(info)
197 + JournalBlocks(info) + DataBlocks(info);
198 }
199
200 // States of 'Blob' identified via start block.
201 constexpr uint64_t kStartBlockMinimum = 1; // Smallest 'data' block possible.
202
203 using digest::Digest;
204
205 typedef uint64_t BlockOffsetType;
206 constexpr size_t kBlockOffsetBits = 48;
207 constexpr BlockOffsetType kBlockOffsetMax = (1LLU << kBlockOffsetBits) - 1;
208 constexpr uint64_t kBlockOffsetMask = kBlockOffsetMax;
209
210 typedef uint16_t BlockCountType;
211 constexpr size_t kBlockCountBits = 16;
212 constexpr size_t kBlockCountMax = std::numeric_limits<BlockCountType>::max();
213 constexpr uint64_t kBlockCountMask = kBlockCountMax << kBlockOffsetBits;
214
215 class Extent {
216 public:
Extent()217 Extent() : data_(0) {};
Extent(BlockOffsetType start,BlockCountType length)218 Extent(BlockOffsetType start, BlockCountType length) : data_(0) {
219 SetStart(start);
220 SetLength(length);
221 }
222
Start()223 BlockOffsetType Start() const {
224 return data_ & kBlockOffsetMask;
225 }
226
SetStart(BlockOffsetType start)227 void SetStart(BlockOffsetType start) {
228 ZX_DEBUG_ASSERT(start <= kBlockOffsetMax);
229 data_ = (data_ & ~kBlockOffsetMask) | (start & kBlockOffsetMask);
230 }
231
Length()232 BlockCountType Length() const {
233 return static_cast<BlockCountType>((data_ & kBlockCountMask) >> kBlockOffsetBits);
234 }
235
SetLength(BlockCountType length)236 void SetLength(BlockCountType length) {
237 data_ = (data_ & ~kBlockCountMask) | ((length & kBlockCountMax) << kBlockOffsetBits);
238 }
239
240 bool operator==(const Extent& rhs) const {
241 return Start() == rhs.Start() && Length() == rhs.Length();
242 }
243
244 private:
245 uint64_t data_;
246 };
247
248 static_assert(sizeof(Extent) == sizeof(uint64_t), "Extent class should only contain data");
249
250 // The number of extents within a single blob.
251 typedef uint16_t ExtentCountType;
252
253 // The largest number of extents which can compose a blob.
254 constexpr size_t kMaxBlobExtents = std::numeric_limits<ExtentCountType>::max();
255
256 // Identifies that the node is allocated.
257 // Both inodes and extent containers can be allocated.
258 constexpr uint16_t kBlobFlagAllocated = 1 << 0;
259
260 // Identifies that the on-disk storage of the blob is LZ4 compressed.
261 constexpr uint16_t kBlobFlagLZ4Compressed = 1 << 1;
262
263 // Identifies that this node is a container for extents.
264 constexpr uint16_t kBlobFlagExtentContainer = 1 << 2;
265
266 // The number of extents within a normal inode.
267 constexpr uint32_t kInlineMaxExtents = 1;
268 // The number of extents within an extent container node.
269 constexpr uint32_t kContainerMaxExtents = 6;
270
271 struct NodePrelude {
272 uint16_t flags;
273 uint16_t version;
274 // The next node containing this blob's extents.
275 // Zero if there are no more extents.
276 uint32_t next_node;
277
IsAllocatedNodePrelude278 bool IsAllocated() const {
279 return flags & kBlobFlagAllocated;
280 }
281
IsExtentContainerNodePrelude282 bool IsExtentContainer() const {
283 return flags & kBlobFlagExtentContainer;
284 }
285 };
286
287 struct ExtentContainer;
288
289 struct Inode {
290 NodePrelude header;
291 uint8_t merkle_root_hash[Digest::kLength];
292 uint64_t blob_size;
293
294 // The total number of Blocks used to represent this blob.
295 uint32_t block_count;
296 // The total number of Extent objects necessary to represent this blob.
297 ExtentCountType extent_count;
298 uint16_t reserved;
299 Extent extents[kInlineMaxExtents];
300
AsExtentContainerInode301 ExtentContainer* AsExtentContainer() {
302 return reinterpret_cast<ExtentContainer*>(this);
303 }
304 };
305
306 struct ExtentContainer {
307 NodePrelude header;
308 // The map index of the previous node.
309 uint32_t previous_node;
310 // The number of extents within this container.
311 ExtentCountType extent_count;
312 uint16_t reserved;
313 Extent extents[kContainerMaxExtents];
314 };
315
316 static_assert(sizeof(Inode) == sizeof(ExtentContainer),
317 "Extent nodes must be as large as inodes");
318 static_assert(sizeof(Inode) == kBlobfsInodeSize,
319 "Blobfs Inode size is wrong");
320 static_assert(kBlobfsBlockSize % kBlobfsInodeSize == 0,
321 "Blobfs Inodes should fit cleanly within a blobfs block");
322
323 // Number of blocks reserved for the blob itself
BlobDataBlocks(const Inode & blobNode)324 constexpr uint64_t BlobDataBlocks(const Inode& blobNode) {
325 return fbl::round_up(blobNode.blob_size, kBlobfsBlockSize) / kBlobfsBlockSize;
326 }
327
328 } // namespace blobfs
329