1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file describes the on-disk structure of Blobfs.
6 
7 #pragma once
8 
9 #include <digest/digest.h>
10 #include <digest/merkle-tree.h>
11 #include <fbl/algorithm.h>
12 #include <fbl/macros.h>
13 #include <zircon/types.h>
14 
15 #include <assert.h>
16 #include <limits>
17 #include <stdbool.h>
18 #include <stdint.h>
19 
20 #include <algorithm>
21 
22 #ifdef __Fuchsia__
23 #include <zircon/syscalls.h>
24 #endif
25 
26 // clang-format off
27 
28 namespace blobfs {
29 constexpr uint64_t kBlobfsMagic0  = (0xac2153479e694d21ULL);
30 constexpr uint64_t kBlobfsMagic1  = (0x985000d4d4d3d314ULL);
31 constexpr uint32_t kBlobfsVersion = 0x00000007;
32 
33 constexpr uint32_t kBlobFlagClean        = 1;
34 constexpr uint32_t kBlobFlagDirty        = 2;
35 constexpr uint32_t kBlobFlagFVM          = 4;
36 constexpr uint32_t kBlobfsBlockSize      = 8192;
37 constexpr uint32_t kBlobfsBlockBits      = (kBlobfsBlockSize * 8);
38 constexpr uint32_t kBlobfsBlockMapStart  = 1;
39 constexpr uint32_t kBlobfsInodeSize      = 64;
40 constexpr uint32_t kBlobfsInodesPerBlock = (kBlobfsBlockSize / kBlobfsInodeSize);
41 
42 constexpr size_t kFVMBlockMapStart  = 0x10000;
43 constexpr size_t kFVMNodeMapStart   = 0x20000;
44 constexpr size_t kFVMJournalStart   = 0x30000;
45 constexpr size_t kFVMDataStart      = 0x40000;
46 
47 // Number of metadata blocks allocated for the whole journal: 1 info block.
48 constexpr uint32_t kJournalMetadataBlocks = 1;
49 // Number of metadata blocks allocated for each entry: 2 (header block, commit block).
50 constexpr uint32_t kEntryMetadataBlocks = 2;
51 // Maximum number of data blocks possible for a single entry:
52 // - Blobfs Superblock
53 // - Inode Table Blocks
54 // - Block Bitmap Blocks
55 // TODO(ZX-3076): Calculate the actual upper bound here; this number is not
56 // necessarily considering the worst cases of fragmentation.
57 constexpr uint32_t kMaxEntryDataBlocks = 64;
58 // Minimum possible size for the journal, allowing the maximum size for one entry.
59 constexpr size_t kMinimumJournalBlocks = kJournalMetadataBlocks + kEntryMetadataBlocks +
60                                          kMaxEntryDataBlocks;
61 constexpr size_t kDefaultJournalBlocks = std::max(kMinimumJournalBlocks, static_cast<size_t>(256));
62 
63 constexpr uint64_t kBlobfsDefaultInodeCount = 32768;
64 
65 constexpr size_t kMinimumDataBlocks = 2;
66 
67 #ifdef __Fuchsia__
68 // Use a heuristics-based approach based on physical RAM size to
69 // determine the size of the writeback buffer.
70 //
71 // Currently, we set the writeback buffer size to 2% of physical
72 // memory.
73 //
74 // Should be invoked with caution; the size of the system's total
75 // memory may eventually change after boot.
WriteBufferSize()76 inline size_t WriteBufferSize() {
77     return fbl::round_up((zx_system_get_physmem() * 2) / 100, kBlobfsBlockSize);
78 }
79 #endif
80 
81 constexpr uint64_t kJournalMagic = (0x626c6f626a726e6cULL);
82 
83 // Notes:
84 // - block 0 is always allocated
85 // - inode 0 is never used, should be marked allocated but ignored
86 
87 struct Superblock {
88     uint64_t magic0;
89     uint64_t magic1;
90     uint32_t version;
91     uint32_t flags;
92     uint32_t block_size;          // 8K typical.
93     uint64_t data_block_count;    // Number of data blocks in this area.
94     uint64_t journal_block_count; // Number of journal blocks in this area.
95     uint64_t inode_count;         // Number of blobs in this area.
96     uint64_t alloc_block_count;   // Total number of allocated blocks.
97     uint64_t alloc_inode_count;   // Total number of allocated blobs and container nodes.
98     uint64_t blob_header_next;    // Block containing next blobfs, or zero if this is the last one.
99     // The following fields are only valid with (flags & kBlobFlagFVM):
100     uint64_t slice_size;          // Underlying slice size.
101     uint64_t vslice_count;        // Number of underlying slices.
102     uint32_t abm_slices;          // Slices allocated to block bitmap.
103     uint32_t ino_slices;          // Slices allocated to node map.
104     uint32_t dat_slices;          // Slices allocated to file data section.
105     uint32_t journal_slices;      // Slices allocated to journal section.
106 };
107 
108 static_assert(sizeof(Superblock) <= kBlobfsBlockSize, "Invalid blobfs superblock size");
109 
110 // TODO(ZX-2729): Use counter instead of timestamp (for journal info block and entries).
111 struct JournalInfo {
112     uint64_t magic;
113     uint64_t start_block; // Block at which the first journal entry starts.
114     uint64_t num_blocks; // Number of valid blocks currently contained in the journal.
115     uint64_t timestamp; // Timestamp (in ticks) at which the info block was last written.
116     uint32_t checksum; // crc32 checksum of the preceding contents of the info block.
117 };
118 
119 static_assert(sizeof(JournalInfo) <= kBlobfsBlockSize, "Journal info size is too large");
120 
121 struct HeaderBlock {
122     uint64_t magic;
123     uint64_t timestamp;
124     uint64_t reserved;
125     uint64_t num_blocks;
126     uint64_t target_blocks[kMaxEntryDataBlocks];
127 };
128 
129 static_assert(sizeof(HeaderBlock) <= kBlobfsBlockSize, "HeaderBlock size is too large");
130 
131 struct CommitBlock {
132     uint64_t magic;
133     uint64_t timestamp; // Timestamp (in ticks) at which the journal entry was written.
134     uint32_t checksum; // crc32 checksum of all preceding blocks in the entry.
135 };
136 
137 static_assert(sizeof(CommitBlock) <= kBlobfsBlockSize, "CommitBlock size is too large");
138 
BlockMapStartBlock(const Superblock & info)139 constexpr uint64_t BlockMapStartBlock(const Superblock& info) {
140     if (info.flags & kBlobFlagFVM) {
141         return kFVMBlockMapStart;
142     } else {
143         return kBlobfsBlockMapStart;
144     }
145 }
146 
BlockMapBlocks(const Superblock & info)147 constexpr uint64_t BlockMapBlocks(const Superblock& info) {
148     return fbl::round_up(info.data_block_count, kBlobfsBlockBits) / kBlobfsBlockBits;
149 }
150 
NodeMapStartBlock(const Superblock & info)151 constexpr uint64_t NodeMapStartBlock(const Superblock& info) {
152     // Node map immediately follows the block map
153     if (info.flags & kBlobFlagFVM) {
154         return kFVMNodeMapStart;
155     } else {
156         // Node map immediately follows the block map.
157         return BlockMapStartBlock(info) + BlockMapBlocks(info);
158     }
159 }
160 
NodeBitmapBlocks(const Superblock & info)161 constexpr uint64_t NodeBitmapBlocks(const Superblock& info) {
162     return fbl::round_up(info.inode_count, kBlobfsBlockBits) / kBlobfsBlockBits;
163 }
164 
NodeMapBlocks(const Superblock & info)165 constexpr uint64_t NodeMapBlocks(const Superblock& info) {
166     return fbl::round_up(info.inode_count, kBlobfsInodesPerBlock) / kBlobfsInodesPerBlock;
167 }
168 
JournalStartBlock(const Superblock & info)169 constexpr uint64_t JournalStartBlock(const Superblock& info) {
170     if (info.flags & kBlobFlagFVM) {
171         return kFVMJournalStart;
172     }
173 
174     // Journal immediately follows the node map.
175     return NodeMapStartBlock(info) + NodeMapBlocks(info);
176 }
177 
JournalBlocks(const Superblock & info)178 constexpr uint64_t JournalBlocks(const Superblock& info) {
179     return info.journal_block_count;
180 }
181 
DataStartBlock(const Superblock & info)182 constexpr uint64_t DataStartBlock(const Superblock& info) {
183     if (info.flags & kBlobFlagFVM) {
184         return kFVMDataStart;
185     }
186 
187     // Data immediately follows the journal.
188     return JournalStartBlock(info) + JournalBlocks(info);
189 }
190 
DataBlocks(const Superblock & info)191 constexpr uint64_t DataBlocks(const Superblock& info) {
192     return info.data_block_count;
193 }
194 
TotalBlocks(const Superblock & info)195 constexpr uint64_t TotalBlocks(const Superblock& info) {
196     return BlockMapStartBlock(info) + BlockMapBlocks(info) + NodeMapBlocks(info)
197            + JournalBlocks(info) + DataBlocks(info);
198 }
199 
200 // States of 'Blob' identified via start block.
201 constexpr uint64_t kStartBlockMinimum  = 1; // Smallest 'data' block possible.
202 
203 using digest::Digest;
204 
205 typedef uint64_t BlockOffsetType;
206 constexpr size_t kBlockOffsetBits = 48;
207 constexpr BlockOffsetType kBlockOffsetMax = (1LLU << kBlockOffsetBits) - 1;
208 constexpr uint64_t kBlockOffsetMask = kBlockOffsetMax;
209 
210 typedef uint16_t BlockCountType;
211 constexpr size_t kBlockCountBits = 16;
212 constexpr size_t kBlockCountMax = std::numeric_limits<BlockCountType>::max();
213 constexpr uint64_t kBlockCountMask = kBlockCountMax << kBlockOffsetBits;
214 
215 class Extent {
216 public:
Extent()217     Extent() : data_(0) {};
Extent(BlockOffsetType start,BlockCountType length)218     Extent(BlockOffsetType start, BlockCountType length) : data_(0) {
219         SetStart(start);
220         SetLength(length);
221     }
222 
Start()223     BlockOffsetType Start() const {
224         return data_ & kBlockOffsetMask;
225     }
226 
SetStart(BlockOffsetType start)227     void SetStart(BlockOffsetType start) {
228         ZX_DEBUG_ASSERT(start <= kBlockOffsetMax);
229         data_ = (data_ & ~kBlockOffsetMask) | (start & kBlockOffsetMask);
230     }
231 
Length()232     BlockCountType Length() const {
233         return static_cast<BlockCountType>((data_ & kBlockCountMask) >> kBlockOffsetBits);
234     }
235 
SetLength(BlockCountType length)236     void SetLength(BlockCountType length) {
237         data_ = (data_ & ~kBlockCountMask) | ((length & kBlockCountMax) << kBlockOffsetBits);
238     }
239 
240     bool operator==(const Extent& rhs) const {
241         return Start() == rhs.Start() && Length() == rhs.Length();
242     }
243 
244 private:
245     uint64_t data_;
246 };
247 
248 static_assert(sizeof(Extent) == sizeof(uint64_t), "Extent class should only contain data");
249 
250 // The number of extents within a single blob.
251 typedef uint16_t ExtentCountType;
252 
253 // The largest number of extents which can compose a blob.
254 constexpr size_t kMaxBlobExtents = std::numeric_limits<ExtentCountType>::max();
255 
256 // Identifies that the node is allocated.
257 // Both inodes and extent containers can be allocated.
258 constexpr uint16_t kBlobFlagAllocated = 1 << 0;
259 
260 // Identifies that the on-disk storage of the blob is LZ4 compressed.
261 constexpr uint16_t kBlobFlagLZ4Compressed = 1 << 1;
262 
263 // Identifies that this node is a container for extents.
264 constexpr uint16_t kBlobFlagExtentContainer = 1 << 2;
265 
266 // The number of extents within a normal inode.
267 constexpr uint32_t kInlineMaxExtents = 1;
268 // The number of extents within an extent container node.
269 constexpr uint32_t kContainerMaxExtents = 6;
270 
271 struct NodePrelude {
272     uint16_t flags;
273     uint16_t version;
274     // The next node containing this blob's extents.
275     // Zero if there are no more extents.
276     uint32_t next_node;
277 
IsAllocatedNodePrelude278     bool IsAllocated() const {
279         return flags & kBlobFlagAllocated;
280     }
281 
IsExtentContainerNodePrelude282     bool IsExtentContainer() const {
283         return flags & kBlobFlagExtentContainer;
284     }
285 };
286 
287 struct ExtentContainer;
288 
289 struct Inode {
290     NodePrelude header;
291     uint8_t  merkle_root_hash[Digest::kLength];
292     uint64_t blob_size;
293 
294     // The total number of Blocks used to represent this blob.
295     uint32_t block_count;
296     // The total number of Extent objects necessary to represent this blob.
297     ExtentCountType extent_count;
298     uint16_t reserved;
299     Extent extents[kInlineMaxExtents];
300 
AsExtentContainerInode301     ExtentContainer* AsExtentContainer() {
302         return reinterpret_cast<ExtentContainer*>(this);
303     }
304 };
305 
306 struct ExtentContainer {
307     NodePrelude header;
308     // The map index of the previous node.
309     uint32_t previous_node;
310     // The number of extents within this container.
311     ExtentCountType extent_count;
312     uint16_t reserved;
313     Extent extents[kContainerMaxExtents];
314 };
315 
316 static_assert(sizeof(Inode) == sizeof(ExtentContainer),
317               "Extent nodes must be as large as inodes");
318 static_assert(sizeof(Inode) == kBlobfsInodeSize,
319               "Blobfs Inode size is wrong");
320 static_assert(kBlobfsBlockSize % kBlobfsInodeSize == 0,
321               "Blobfs Inodes should fit cleanly within a blobfs block");
322 
323 // Number of blocks reserved for the blob itself
BlobDataBlocks(const Inode & blobNode)324 constexpr uint64_t BlobDataBlocks(const Inode& blobNode) {
325     return fbl::round_up(blobNode.blob_size, kBlobfsBlockSize) / kBlobfsBlockSize;
326 }
327 
328 } // namespace blobfs
329