1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <limits>
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14 
15 #include <digest/digest.h>
16 #include <digest/merkle-tree.h>
17 #include <fs/block-txn.h>
18 #include <fs/trace.h>
19 
20 #ifdef __Fuchsia__
21 #include <fs/fvm.h>
22 #endif
23 
24 #include <blobfs/common.h>
25 
26 using digest::Digest;
27 using digest::MerkleTree;
28 
29 namespace blobfs {
30 
31 // Number of blocks reserved for the Merkle Tree
MerkleTreeBlocks(const Inode & blobNode)32 uint32_t MerkleTreeBlocks(const Inode& blobNode) {
33     uint64_t size_merkle = MerkleTree::GetTreeLength(blobNode.blob_size);
34     ZX_DEBUG_ASSERT(size_merkle <= std::numeric_limits<uint32_t>::max());
35     return fbl::round_up(static_cast<uint32_t>(size_merkle), kBlobfsBlockSize) / kBlobfsBlockSize;
36 }
37 
38 // Sanity check the metadata for the blobfs, given a maximum number of
39 // available blocks.
CheckSuperblock(const Superblock * info,uint64_t max)40 zx_status_t CheckSuperblock(const Superblock* info, uint64_t max) {
41     if ((info->magic0 != kBlobfsMagic0) ||
42         (info->magic1 != kBlobfsMagic1)) {
43         FS_TRACE_ERROR("blobfs: bad magic\n");
44         return ZX_ERR_INVALID_ARGS;
45     }
46     if (info->version != kBlobfsVersion) {
47         FS_TRACE_ERROR("blobfs: FS Version: %08x. Driver version: %08x\n", info->version,
48                        kBlobfsVersion);
49         return ZX_ERR_INVALID_ARGS;
50     }
51     if (info->block_size != kBlobfsBlockSize) {
52         FS_TRACE_ERROR("blobfs: bsz %u unsupported\n", info->block_size);
53         return ZX_ERR_INVALID_ARGS;
54     }
55     if ((info->flags & kBlobFlagFVM) == 0) {
56         if (TotalBlocks(*info) > max) {
57             FS_TRACE_ERROR("blobfs: too large for device\n");
58             return ZX_ERR_INVALID_ARGS;
59         }
60     } else {
61         const size_t blocks_per_slice = info->slice_size / info->block_size;
62 
63         size_t abm_blocks_needed = BlockMapBlocks(*info);
64         size_t abm_blocks_allocated = info->abm_slices * blocks_per_slice;
65         if (abm_blocks_needed > abm_blocks_allocated) {
66             FS_TRACE_ERROR("blobfs: Not enough slices for block bitmap\n");
67             return ZX_ERR_INVALID_ARGS;
68         } else if (abm_blocks_allocated + BlockMapStartBlock(*info) >= NodeMapStartBlock(*info)) {
69             FS_TRACE_ERROR("blobfs: Block bitmap collides into node map\n");
70             return ZX_ERR_INVALID_ARGS;
71         }
72 
73         size_t ino_blocks_needed = NodeMapBlocks(*info);
74         size_t ino_blocks_allocated = info->ino_slices * blocks_per_slice;
75         if (ino_blocks_needed > ino_blocks_allocated) {
76             FS_TRACE_ERROR("blobfs: Not enough slices for node map\n");
77             return ZX_ERR_INVALID_ARGS;
78         } else if (ino_blocks_allocated + NodeMapStartBlock(*info) >= DataStartBlock(*info)) {
79             FS_TRACE_ERROR("blobfs: Node bitmap collides into data blocks\n");
80             return ZX_ERR_INVALID_ARGS;
81         }
82 
83         size_t dat_blocks_needed = DataBlocks(*info);
84         size_t dat_blocks_allocated = info->dat_slices * blocks_per_slice;
85         if (dat_blocks_needed < kStartBlockMinimum) {
86             FS_TRACE_ERROR("blobfs: Partition too small; no space left for data blocks\n");
87             return ZX_ERR_INVALID_ARGS;
88         } else if (dat_blocks_needed > dat_blocks_allocated) {
89             FS_TRACE_ERROR("blobfs: Not enough slices for data blocks\n");
90             return ZX_ERR_INVALID_ARGS;
91         } else if (dat_blocks_allocated + DataStartBlock(*info) >
92                    std::numeric_limits<uint32_t>::max()) {
93             FS_TRACE_ERROR("blobfs: Data blocks overflow uint32\n");
94             return ZX_ERR_INVALID_ARGS;
95         }
96     }
97     if (info->blob_header_next != 0) {
98         FS_TRACE_ERROR("blobfs: linked blob headers not yet supported\n");
99         return ZX_ERR_INVALID_ARGS;
100     }
101     return ZX_OK;
102 }
103 
GetBlockCount(int fd,uint64_t * out)104 zx_status_t GetBlockCount(int fd, uint64_t* out) {
105 #ifdef __Fuchsia__
106     block_info_t info;
107     ssize_t r;
108     if ((r = ioctl_block_get_info(fd, &info)) < 0) {
109         return static_cast<zx_status_t>(r);
110     }
111     *out = (info.block_size * info.block_count) / kBlobfsBlockSize;
112 #else
113     struct stat s;
114     if (fstat(fd, &s) < 0) {
115         return ZX_ERR_BAD_STATE;
116     }
117     *out = s.st_size / kBlobfsBlockSize;
118 #endif
119     return ZX_OK;
120 }
121 
readblk(int fd,uint64_t bno,void * data)122 zx_status_t readblk(int fd, uint64_t bno, void* data) {
123     off_t off = bno * kBlobfsBlockSize;
124     if (lseek(fd, off, SEEK_SET) < 0) {
125         FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
126         return ZX_ERR_IO;
127     }
128     if (read(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
129         FS_TRACE_ERROR("blobfs: cannot read block %" PRIu64 "\n", bno);
130         return ZX_ERR_IO;
131     }
132     return ZX_OK;
133 }
134 
writeblk(int fd,uint64_t bno,const void * data)135 zx_status_t writeblk(int fd, uint64_t bno, const void* data) {
136     off_t off = bno * kBlobfsBlockSize;
137     if (lseek(fd, off, SEEK_SET) < 0) {
138         FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
139         return ZX_ERR_IO;
140     }
141     if (write(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
142         FS_TRACE_ERROR("blobfs: cannot write block %" PRIu64 "\n", bno);
143         return ZX_ERR_IO;
144     }
145     return ZX_OK;
146 }
147 
Mkfs(int fd,uint64_t block_count)148 int Mkfs(int fd, uint64_t block_count) {
149     uint64_t inodes = kBlobfsDefaultInodeCount;
150 
151     Superblock info;
152     memset(&info, 0x00, sizeof(info));
153     info.magic0 = kBlobfsMagic0;
154     info.magic1 = kBlobfsMagic1;
155     info.version = kBlobfsVersion;
156     info.flags = kBlobFlagClean;
157     info.block_size = kBlobfsBlockSize;
158     //TODO(planders): Consider modifying the inode count if we are low on space.
159     //                It doesn't make sense to have fewer data blocks than inodes.
160     info.inode_count = inodes;
161     info.alloc_block_count = 0;
162     info.alloc_inode_count = 0;
163     info.blob_header_next = 0; // TODO(smklein): Allow chaining
164 
165     // Temporarily set the data_block_count to the total block_count so we can estimate the number
166     // of pre-data blocks.
167     info.data_block_count = block_count;
168 
169     // The result of DataStartBlock(info) is based on the current value of info.data_block_count.
170     // As a result, the block bitmap may have slightly more space allocated than is necessary.
171     size_t usable_blocks = JournalStartBlock(info) < block_count
172                            ? block_count - JournalStartBlock(info)
173                            : 0;
174 
175     // Determine allocation for the journal vs. data blocks based on the number of blocks remaining.
176     if (usable_blocks >= kDefaultJournalBlocks * 2) {
177         // Regular-sized partition, capable of fitting a data region
178         // at least as large as the journal. Give all excess blocks
179         // to the data region.
180         info.journal_block_count = kDefaultJournalBlocks;
181         info.data_block_count = usable_blocks - kDefaultJournalBlocks;
182     } else if (usable_blocks >= kMinimumDataBlocks + kMinimumJournalBlocks) {
183         // On smaller partitions, give both regions the minimum amount of space,
184         // and split the remainder. The choice of where to allocate the "remainder"
185         // is arbitrary.
186         const size_t remainder_blocks = usable_blocks -
187                                         (kMinimumDataBlocks + kMinimumJournalBlocks);
188         const size_t remainder_for_journal = remainder_blocks / 2;
189         const size_t remainder_for_data = remainder_blocks - remainder_for_journal;
190         info.journal_block_count = kMinimumJournalBlocks + remainder_for_journal;
191         info.data_block_count = kMinimumDataBlocks + remainder_for_data;
192     } else {
193         // Error, partition too small.
194         info.journal_block_count = 0;
195         info.data_block_count = 0;
196     }
197 
198 #ifdef __Fuchsia__
199     fvm_info_t fvm_info;
200 
201     if (ioctl_block_fvm_query(fd, &fvm_info) >= 0) {
202         info.slice_size = fvm_info.slice_size;
203         info.flags |= kBlobFlagFVM;
204 
205         if (info.slice_size % kBlobfsBlockSize) {
206             FS_TRACE_ERROR("blobfs mkfs: Slice size not multiple of blobfs block\n");
207             return -1;
208         }
209 
210         if (fs::fvm_reset_volume_slices(fd) != ZX_OK) {
211             FS_TRACE_ERROR("blobfs mkfs: Failed to reset slices\n");
212             return -1;
213         }
214 
215         const size_t kBlocksPerSlice = info.slice_size / kBlobfsBlockSize;
216 
217         extend_request_t request;
218         request.length = 1;
219         request.offset = kFVMBlockMapStart / kBlocksPerSlice;
220         if (ioctl_block_fvm_extend(fd, &request) < 0) {
221             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate block map\n");
222             return -1;
223         }
224 
225         request.offset = kFVMNodeMapStart / kBlocksPerSlice;
226         if (ioctl_block_fvm_extend(fd, &request) < 0) {
227             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate node map\n");
228             return -1;
229         }
230 
231         // Allocate the minimum number of journal blocks in FVM.
232         request.offset = kFVMJournalStart / kBlocksPerSlice;
233         request.length = fbl::round_up(kDefaultJournalBlocks, kBlocksPerSlice) / kBlocksPerSlice;
234         info.journal_slices = static_cast<uint32_t>(request.length);
235         if (ioctl_block_fvm_extend(fd, &request) < 0) {
236             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate journal blocks\n");
237             return -1;
238         }
239 
240         // Allocate the minimum number of data blocks in the FVM.
241         request.offset = kFVMDataStart / kBlocksPerSlice;
242         request.length = fbl::round_up(kMinimumDataBlocks, kBlocksPerSlice) / kBlocksPerSlice;
243         info.dat_slices = static_cast<uint32_t>(request.length);
244         if (ioctl_block_fvm_extend(fd, &request) < 0) {
245             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate data blocks\n");
246             return -1;
247         }
248 
249         info.abm_slices = 1;
250         info.ino_slices = 1;
251 
252         info.vslice_count = info.abm_slices + info.ino_slices + info.dat_slices +
253                             info.journal_slices + 1;
254 
255         info.inode_count = static_cast<uint32_t>(info.ino_slices * info.slice_size
256                                                  / kBlobfsInodeSize);
257 
258         info.data_block_count = static_cast<uint32_t>(info.dat_slices * info.slice_size
259                                                       / kBlobfsBlockSize);
260         info.journal_block_count = static_cast<uint32_t>(info.journal_slices * info.slice_size
261                                                          / kBlobfsBlockSize);
262     }
263 #endif
264 
265     FS_TRACE_DEBUG("Blobfs Mkfs\n");
266     FS_TRACE_DEBUG("Disk size  : %" PRIu64 "\n", block_count * kBlobfsBlockSize);
267     FS_TRACE_DEBUG("Block Size : %u\n", kBlobfsBlockSize);
268     FS_TRACE_DEBUG("Block Count: %" PRIu64 "\n", TotalBlocks(info));
269     FS_TRACE_DEBUG("Inode Count: %" PRIu64 "\n", inodes);
270     FS_TRACE_DEBUG("FVM-aware: %s\n", (info.flags & kBlobFlagFVM) ? "YES" : "NO");
271 
272     if (info.data_block_count < kMinimumDataBlocks) {
273         FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum data partition\n");
274         return -1;
275     }
276 
277     if (info.journal_block_count < kMinimumJournalBlocks) {
278         FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum journal partition\n");
279         return -1;
280     }
281 
282     // Determine the number of blocks necessary for the block map and node map.
283     uint64_t bbm_blocks = BlockMapBlocks(info);
284     uint64_t nbm_blocks = NodeMapBlocks(info);
285 
286     RawBitmap abm;
287     if (abm.Reset(bbm_blocks * kBlobfsBlockBits)) {
288         FS_TRACE_ERROR("Couldn't allocate blobfs block map\n");
289         return -1;
290     } else if (abm.Shrink(info.data_block_count)) {
291         FS_TRACE_ERROR("Couldn't shrink blobfs block map\n");
292         return -1;
293     }
294 
295     // Reserve first |kStartBlockMinimum| data blocks
296     abm.Set(0, kStartBlockMinimum);
297     info.alloc_block_count += kStartBlockMinimum;
298 
299     if (info.inode_count * sizeof(Inode) != nbm_blocks * kBlobfsBlockSize) {
300         FS_TRACE_ERROR("For simplicity, inode table block must be entirely filled\n");
301         return -1;
302     }
303 
304     // All in-memory structures have been created successfully. Dump everything to disk.
305     zx_status_t status;
306     char block[kBlobfsBlockSize];
307     memset(block, 0, sizeof(block));
308 
309     JournalInfo* journal_info = reinterpret_cast<JournalInfo*>(block);
310     journal_info->magic = kJournalMagic;
311     if ((status = writeblk(fd, JournalStartBlock(info), block)) != ZX_OK) {
312         FS_TRACE_ERROR("Failed to write journal block\n");
313         return status;
314     }
315 
316     // write the root block to disk
317     memset(block, 0, sizeof(journal_info));
318     memcpy(block, &info, sizeof(info));
319     if ((status = writeblk(fd, 0, block)) != ZX_OK) {
320         FS_TRACE_ERROR("Failed to write root block\n");
321         return status;
322     }
323 
324     // write allocation bitmap to disk
325     for (uint64_t n = 0; n < bbm_blocks; n++) {
326         void* bmdata = GetRawBitmapData(abm, n);
327         if ((status = writeblk(fd, BlockMapStartBlock(info) + n, bmdata)) < 0) {
328             FS_TRACE_ERROR("Failed to write blockmap block %" PRIu64 "\n", n);
329             return status;
330         }
331     }
332 
333     // write node map to disk
334     for (uint64_t n = 0; n < nbm_blocks; n++) {
335         memset(block, 0, sizeof(block));
336         if (writeblk(fd, NodeMapStartBlock(info) + n, block)) {
337             FS_TRACE_ERROR("blobfs: failed writing inode map\n");
338             return ZX_ERR_IO;
339         }
340     }
341 
342     FS_TRACE_DEBUG("BLOBFS: mkfs success\n");
343     return 0;
344 }
345 
346 } // namespace blobfs
347