1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <limits>
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14
15 #include <digest/digest.h>
16 #include <digest/merkle-tree.h>
17 #include <fs/block-txn.h>
18 #include <fs/trace.h>
19
20 #ifdef __Fuchsia__
21 #include <fs/fvm.h>
22 #endif
23
24 #include <blobfs/common.h>
25
26 using digest::Digest;
27 using digest::MerkleTree;
28
29 namespace blobfs {
30
31 // Number of blocks reserved for the Merkle Tree
MerkleTreeBlocks(const Inode & blobNode)32 uint32_t MerkleTreeBlocks(const Inode& blobNode) {
33 uint64_t size_merkle = MerkleTree::GetTreeLength(blobNode.blob_size);
34 ZX_DEBUG_ASSERT(size_merkle <= std::numeric_limits<uint32_t>::max());
35 return fbl::round_up(static_cast<uint32_t>(size_merkle), kBlobfsBlockSize) / kBlobfsBlockSize;
36 }
37
38 // Sanity check the metadata for the blobfs, given a maximum number of
39 // available blocks.
CheckSuperblock(const Superblock * info,uint64_t max)40 zx_status_t CheckSuperblock(const Superblock* info, uint64_t max) {
41 if ((info->magic0 != kBlobfsMagic0) ||
42 (info->magic1 != kBlobfsMagic1)) {
43 FS_TRACE_ERROR("blobfs: bad magic\n");
44 return ZX_ERR_INVALID_ARGS;
45 }
46 if (info->version != kBlobfsVersion) {
47 FS_TRACE_ERROR("blobfs: FS Version: %08x. Driver version: %08x\n", info->version,
48 kBlobfsVersion);
49 return ZX_ERR_INVALID_ARGS;
50 }
51 if (info->block_size != kBlobfsBlockSize) {
52 FS_TRACE_ERROR("blobfs: bsz %u unsupported\n", info->block_size);
53 return ZX_ERR_INVALID_ARGS;
54 }
55 if ((info->flags & kBlobFlagFVM) == 0) {
56 if (TotalBlocks(*info) > max) {
57 FS_TRACE_ERROR("blobfs: too large for device\n");
58 return ZX_ERR_INVALID_ARGS;
59 }
60 } else {
61 const size_t blocks_per_slice = info->slice_size / info->block_size;
62
63 size_t abm_blocks_needed = BlockMapBlocks(*info);
64 size_t abm_blocks_allocated = info->abm_slices * blocks_per_slice;
65 if (abm_blocks_needed > abm_blocks_allocated) {
66 FS_TRACE_ERROR("blobfs: Not enough slices for block bitmap\n");
67 return ZX_ERR_INVALID_ARGS;
68 } else if (abm_blocks_allocated + BlockMapStartBlock(*info) >= NodeMapStartBlock(*info)) {
69 FS_TRACE_ERROR("blobfs: Block bitmap collides into node map\n");
70 return ZX_ERR_INVALID_ARGS;
71 }
72
73 size_t ino_blocks_needed = NodeMapBlocks(*info);
74 size_t ino_blocks_allocated = info->ino_slices * blocks_per_slice;
75 if (ino_blocks_needed > ino_blocks_allocated) {
76 FS_TRACE_ERROR("blobfs: Not enough slices for node map\n");
77 return ZX_ERR_INVALID_ARGS;
78 } else if (ino_blocks_allocated + NodeMapStartBlock(*info) >= DataStartBlock(*info)) {
79 FS_TRACE_ERROR("blobfs: Node bitmap collides into data blocks\n");
80 return ZX_ERR_INVALID_ARGS;
81 }
82
83 size_t dat_blocks_needed = DataBlocks(*info);
84 size_t dat_blocks_allocated = info->dat_slices * blocks_per_slice;
85 if (dat_blocks_needed < kStartBlockMinimum) {
86 FS_TRACE_ERROR("blobfs: Partition too small; no space left for data blocks\n");
87 return ZX_ERR_INVALID_ARGS;
88 } else if (dat_blocks_needed > dat_blocks_allocated) {
89 FS_TRACE_ERROR("blobfs: Not enough slices for data blocks\n");
90 return ZX_ERR_INVALID_ARGS;
91 } else if (dat_blocks_allocated + DataStartBlock(*info) >
92 std::numeric_limits<uint32_t>::max()) {
93 FS_TRACE_ERROR("blobfs: Data blocks overflow uint32\n");
94 return ZX_ERR_INVALID_ARGS;
95 }
96 }
97 if (info->blob_header_next != 0) {
98 FS_TRACE_ERROR("blobfs: linked blob headers not yet supported\n");
99 return ZX_ERR_INVALID_ARGS;
100 }
101 return ZX_OK;
102 }
103
GetBlockCount(int fd,uint64_t * out)104 zx_status_t GetBlockCount(int fd, uint64_t* out) {
105 #ifdef __Fuchsia__
106 block_info_t info;
107 ssize_t r;
108 if ((r = ioctl_block_get_info(fd, &info)) < 0) {
109 return static_cast<zx_status_t>(r);
110 }
111 *out = (info.block_size * info.block_count) / kBlobfsBlockSize;
112 #else
113 struct stat s;
114 if (fstat(fd, &s) < 0) {
115 return ZX_ERR_BAD_STATE;
116 }
117 *out = s.st_size / kBlobfsBlockSize;
118 #endif
119 return ZX_OK;
120 }
121
readblk(int fd,uint64_t bno,void * data)122 zx_status_t readblk(int fd, uint64_t bno, void* data) {
123 off_t off = bno * kBlobfsBlockSize;
124 if (lseek(fd, off, SEEK_SET) < 0) {
125 FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
126 return ZX_ERR_IO;
127 }
128 if (read(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
129 FS_TRACE_ERROR("blobfs: cannot read block %" PRIu64 "\n", bno);
130 return ZX_ERR_IO;
131 }
132 return ZX_OK;
133 }
134
writeblk(int fd,uint64_t bno,const void * data)135 zx_status_t writeblk(int fd, uint64_t bno, const void* data) {
136 off_t off = bno * kBlobfsBlockSize;
137 if (lseek(fd, off, SEEK_SET) < 0) {
138 FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
139 return ZX_ERR_IO;
140 }
141 if (write(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
142 FS_TRACE_ERROR("blobfs: cannot write block %" PRIu64 "\n", bno);
143 return ZX_ERR_IO;
144 }
145 return ZX_OK;
146 }
147
Mkfs(int fd,uint64_t block_count)148 int Mkfs(int fd, uint64_t block_count) {
149 uint64_t inodes = kBlobfsDefaultInodeCount;
150
151 Superblock info;
152 memset(&info, 0x00, sizeof(info));
153 info.magic0 = kBlobfsMagic0;
154 info.magic1 = kBlobfsMagic1;
155 info.version = kBlobfsVersion;
156 info.flags = kBlobFlagClean;
157 info.block_size = kBlobfsBlockSize;
158 //TODO(planders): Consider modifying the inode count if we are low on space.
159 // It doesn't make sense to have fewer data blocks than inodes.
160 info.inode_count = inodes;
161 info.alloc_block_count = 0;
162 info.alloc_inode_count = 0;
163 info.blob_header_next = 0; // TODO(smklein): Allow chaining
164
165 // Temporarily set the data_block_count to the total block_count so we can estimate the number
166 // of pre-data blocks.
167 info.data_block_count = block_count;
168
169 // The result of DataStartBlock(info) is based on the current value of info.data_block_count.
170 // As a result, the block bitmap may have slightly more space allocated than is necessary.
171 size_t usable_blocks = JournalStartBlock(info) < block_count
172 ? block_count - JournalStartBlock(info)
173 : 0;
174
175 // Determine allocation for the journal vs. data blocks based on the number of blocks remaining.
176 if (usable_blocks >= kDefaultJournalBlocks * 2) {
177 // Regular-sized partition, capable of fitting a data region
178 // at least as large as the journal. Give all excess blocks
179 // to the data region.
180 info.journal_block_count = kDefaultJournalBlocks;
181 info.data_block_count = usable_blocks - kDefaultJournalBlocks;
182 } else if (usable_blocks >= kMinimumDataBlocks + kMinimumJournalBlocks) {
183 // On smaller partitions, give both regions the minimum amount of space,
184 // and split the remainder. The choice of where to allocate the "remainder"
185 // is arbitrary.
186 const size_t remainder_blocks = usable_blocks -
187 (kMinimumDataBlocks + kMinimumJournalBlocks);
188 const size_t remainder_for_journal = remainder_blocks / 2;
189 const size_t remainder_for_data = remainder_blocks - remainder_for_journal;
190 info.journal_block_count = kMinimumJournalBlocks + remainder_for_journal;
191 info.data_block_count = kMinimumDataBlocks + remainder_for_data;
192 } else {
193 // Error, partition too small.
194 info.journal_block_count = 0;
195 info.data_block_count = 0;
196 }
197
198 #ifdef __Fuchsia__
199 fvm_info_t fvm_info;
200
201 if (ioctl_block_fvm_query(fd, &fvm_info) >= 0) {
202 info.slice_size = fvm_info.slice_size;
203 info.flags |= kBlobFlagFVM;
204
205 if (info.slice_size % kBlobfsBlockSize) {
206 FS_TRACE_ERROR("blobfs mkfs: Slice size not multiple of blobfs block\n");
207 return -1;
208 }
209
210 if (fs::fvm_reset_volume_slices(fd) != ZX_OK) {
211 FS_TRACE_ERROR("blobfs mkfs: Failed to reset slices\n");
212 return -1;
213 }
214
215 const size_t kBlocksPerSlice = info.slice_size / kBlobfsBlockSize;
216
217 extend_request_t request;
218 request.length = 1;
219 request.offset = kFVMBlockMapStart / kBlocksPerSlice;
220 if (ioctl_block_fvm_extend(fd, &request) < 0) {
221 FS_TRACE_ERROR("blobfs mkfs: Failed to allocate block map\n");
222 return -1;
223 }
224
225 request.offset = kFVMNodeMapStart / kBlocksPerSlice;
226 if (ioctl_block_fvm_extend(fd, &request) < 0) {
227 FS_TRACE_ERROR("blobfs mkfs: Failed to allocate node map\n");
228 return -1;
229 }
230
231 // Allocate the minimum number of journal blocks in FVM.
232 request.offset = kFVMJournalStart / kBlocksPerSlice;
233 request.length = fbl::round_up(kDefaultJournalBlocks, kBlocksPerSlice) / kBlocksPerSlice;
234 info.journal_slices = static_cast<uint32_t>(request.length);
235 if (ioctl_block_fvm_extend(fd, &request) < 0) {
236 FS_TRACE_ERROR("blobfs mkfs: Failed to allocate journal blocks\n");
237 return -1;
238 }
239
240 // Allocate the minimum number of data blocks in the FVM.
241 request.offset = kFVMDataStart / kBlocksPerSlice;
242 request.length = fbl::round_up(kMinimumDataBlocks, kBlocksPerSlice) / kBlocksPerSlice;
243 info.dat_slices = static_cast<uint32_t>(request.length);
244 if (ioctl_block_fvm_extend(fd, &request) < 0) {
245 FS_TRACE_ERROR("blobfs mkfs: Failed to allocate data blocks\n");
246 return -1;
247 }
248
249 info.abm_slices = 1;
250 info.ino_slices = 1;
251
252 info.vslice_count = info.abm_slices + info.ino_slices + info.dat_slices +
253 info.journal_slices + 1;
254
255 info.inode_count = static_cast<uint32_t>(info.ino_slices * info.slice_size
256 / kBlobfsInodeSize);
257
258 info.data_block_count = static_cast<uint32_t>(info.dat_slices * info.slice_size
259 / kBlobfsBlockSize);
260 info.journal_block_count = static_cast<uint32_t>(info.journal_slices * info.slice_size
261 / kBlobfsBlockSize);
262 }
263 #endif
264
265 FS_TRACE_DEBUG("Blobfs Mkfs\n");
266 FS_TRACE_DEBUG("Disk size : %" PRIu64 "\n", block_count * kBlobfsBlockSize);
267 FS_TRACE_DEBUG("Block Size : %u\n", kBlobfsBlockSize);
268 FS_TRACE_DEBUG("Block Count: %" PRIu64 "\n", TotalBlocks(info));
269 FS_TRACE_DEBUG("Inode Count: %" PRIu64 "\n", inodes);
270 FS_TRACE_DEBUG("FVM-aware: %s\n", (info.flags & kBlobFlagFVM) ? "YES" : "NO");
271
272 if (info.data_block_count < kMinimumDataBlocks) {
273 FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum data partition\n");
274 return -1;
275 }
276
277 if (info.journal_block_count < kMinimumJournalBlocks) {
278 FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum journal partition\n");
279 return -1;
280 }
281
282 // Determine the number of blocks necessary for the block map and node map.
283 uint64_t bbm_blocks = BlockMapBlocks(info);
284 uint64_t nbm_blocks = NodeMapBlocks(info);
285
286 RawBitmap abm;
287 if (abm.Reset(bbm_blocks * kBlobfsBlockBits)) {
288 FS_TRACE_ERROR("Couldn't allocate blobfs block map\n");
289 return -1;
290 } else if (abm.Shrink(info.data_block_count)) {
291 FS_TRACE_ERROR("Couldn't shrink blobfs block map\n");
292 return -1;
293 }
294
295 // Reserve first |kStartBlockMinimum| data blocks
296 abm.Set(0, kStartBlockMinimum);
297 info.alloc_block_count += kStartBlockMinimum;
298
299 if (info.inode_count * sizeof(Inode) != nbm_blocks * kBlobfsBlockSize) {
300 FS_TRACE_ERROR("For simplicity, inode table block must be entirely filled\n");
301 return -1;
302 }
303
304 // All in-memory structures have been created successfully. Dump everything to disk.
305 zx_status_t status;
306 char block[kBlobfsBlockSize];
307 memset(block, 0, sizeof(block));
308
309 JournalInfo* journal_info = reinterpret_cast<JournalInfo*>(block);
310 journal_info->magic = kJournalMagic;
311 if ((status = writeblk(fd, JournalStartBlock(info), block)) != ZX_OK) {
312 FS_TRACE_ERROR("Failed to write journal block\n");
313 return status;
314 }
315
316 // write the root block to disk
317 memset(block, 0, sizeof(journal_info));
318 memcpy(block, &info, sizeof(info));
319 if ((status = writeblk(fd, 0, block)) != ZX_OK) {
320 FS_TRACE_ERROR("Failed to write root block\n");
321 return status;
322 }
323
324 // write allocation bitmap to disk
325 for (uint64_t n = 0; n < bbm_blocks; n++) {
326 void* bmdata = GetRawBitmapData(abm, n);
327 if ((status = writeblk(fd, BlockMapStartBlock(info) + n, bmdata)) < 0) {
328 FS_TRACE_ERROR("Failed to write blockmap block %" PRIu64 "\n", n);
329 return status;
330 }
331 }
332
333 // write node map to disk
334 for (uint64_t n = 0; n < nbm_blocks; n++) {
335 memset(block, 0, sizeof(block));
336 if (writeblk(fd, NodeMapStartBlock(info) + n, block)) {
337 FS_TRACE_ERROR("blobfs: failed writing inode map\n");
338 return ZX_ERR_IO;
339 }
340 }
341
342 FS_TRACE_DEBUG("BLOBFS: mkfs success\n");
343 return 0;
344 }
345
346 } // namespace blobfs
347