1 // SPDX-License-Identifier: GPL-2.0+
2 #include <common.h>
3 #include <fs_internal.h>
4 #include <log.h>
5 #include <uuid.h>
6 #include <memalign.h>
7 #include "kernel-shared/btrfs_tree.h"
8 #include "common/rbtree-utils.h"
9 #include "disk-io.h"
10 #include "ctree.h"
11 #include "btrfs.h"
12 #include "volumes.h"
13 #include "extent-io.h"
14 #include "crypto/hash.h"
15 
16 /* specified errno for check_tree_block */
17 #define BTRFS_BAD_BYTENR		(-1)
18 #define BTRFS_BAD_FSID			(-2)
19 #define BTRFS_BAD_LEVEL			(-3)
20 #define BTRFS_BAD_NRITEMS		(-4)
21 
22 /* Calculate max possible nritems for a leaf/node */
max_nritems(u8 level,u32 nodesize)23 static u32 max_nritems(u8 level, u32 nodesize)
24 {
25 
26 	if (level == 0)
27 		return ((nodesize - sizeof(struct btrfs_header)) /
28 			sizeof(struct btrfs_item));
29 	return ((nodesize - sizeof(struct btrfs_header)) /
30 		sizeof(struct btrfs_key_ptr));
31 }
32 
check_tree_block(struct btrfs_fs_info * fs_info,struct extent_buffer * buf)33 static int check_tree_block(struct btrfs_fs_info *fs_info,
34 			    struct extent_buffer *buf)
35 {
36 
37 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
38 	u32 nodesize = fs_info->nodesize;
39 	bool fsid_match = false;
40 	int ret = BTRFS_BAD_FSID;
41 
42 	if (buf->start != btrfs_header_bytenr(buf))
43 		return BTRFS_BAD_BYTENR;
44 	if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
45 		return BTRFS_BAD_LEVEL;
46 	if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
47 						    nodesize))
48 		return BTRFS_BAD_NRITEMS;
49 
50 	/* Only leaf can be empty */
51 	if (btrfs_header_nritems(buf) == 0 &&
52 	    btrfs_header_level(buf) != 0)
53 		return BTRFS_BAD_NRITEMS;
54 
55 	while (fs_devices) {
56 		/*
57 		 * Checking the incompat flag is only valid for the current
58 		 * fs. For seed devices it's forbidden to have their uuid
59 		 * changed so reading ->fsid in this case is fine
60 		 */
61 		if (fs_devices == fs_info->fs_devices &&
62 		    btrfs_fs_incompat(fs_info, METADATA_UUID))
63 			fsid_match = !memcmp_extent_buffer(buf,
64 						   fs_devices->metadata_uuid,
65 						   btrfs_header_fsid(),
66 						   BTRFS_FSID_SIZE);
67 		else
68 			fsid_match = !memcmp_extent_buffer(buf,
69 						    fs_devices->fsid,
70 						    btrfs_header_fsid(),
71 						    BTRFS_FSID_SIZE);
72 
73 
74 		if (fsid_match) {
75 			ret = 0;
76 			break;
77 		}
78 		fs_devices = fs_devices->seed;
79 	}
80 	return ret;
81 }
82 
print_tree_block_error(struct btrfs_fs_info * fs_info,struct extent_buffer * eb,int err)83 static void print_tree_block_error(struct btrfs_fs_info *fs_info,
84 				struct extent_buffer *eb,
85 				int err)
86 {
87 	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
88 	char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
89 	u8 buf[BTRFS_UUID_SIZE];
90 
91 	if (!err)
92 		return;
93 
94 	fprintf(stderr, "bad tree block %llu, ", eb->start);
95 	switch (err) {
96 	case BTRFS_BAD_FSID:
97 		read_extent_buffer(eb, buf, btrfs_header_fsid(),
98 				   BTRFS_UUID_SIZE);
99 		uuid_unparse(buf, found_uuid);
100 		uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
101 		fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
102 			fs_uuid, found_uuid);
103 		break;
104 	case BTRFS_BAD_BYTENR:
105 		fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
106 			eb->start, btrfs_header_bytenr(eb));
107 		break;
108 	case BTRFS_BAD_LEVEL:
109 		fprintf(stderr, "bad level, %u > %d\n",
110 			btrfs_header_level(eb), BTRFS_MAX_LEVEL);
111 		break;
112 	case BTRFS_BAD_NRITEMS:
113 		fprintf(stderr, "invalid nr_items: %u\n",
114 			btrfs_header_nritems(eb));
115 		break;
116 	}
117 }
118 
btrfs_csum_data(u16 csum_type,const u8 * data,u8 * out,size_t len)119 int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
120 {
121 	memset(out, 0, BTRFS_CSUM_SIZE);
122 
123 	switch (csum_type) {
124 	case BTRFS_CSUM_TYPE_CRC32:
125 		return hash_crc32c(data, len, out);
126 	case BTRFS_CSUM_TYPE_XXHASH:
127 		return hash_xxhash(data, len, out);
128 	case BTRFS_CSUM_TYPE_SHA256:
129 		return hash_sha256(data, len, out);
130 	case BTRFS_CSUM_TYPE_BLAKE2:
131 		return hash_blake2(data, len, out);
132 	default:
133 		printf("Unknown csum type %d\n", csum_type);
134 		return -EINVAL;
135 	}
136 }
137 
138 /*
139  * Check if the super is valid:
140  * - nodesize/sectorsize - minimum, maximum, alignment
141  * - tree block starts   - alignment
142  * - number of devices   - something sane
143  * - sys array size      - maximum
144  */
btrfs_check_super(struct btrfs_super_block * sb)145 static int btrfs_check_super(struct btrfs_super_block *sb)
146 {
147 	u8 result[BTRFS_CSUM_SIZE];
148 	u16 csum_type;
149 	int csum_size;
150 	u8 *metadata_uuid;
151 
152 	if (btrfs_super_magic(sb) != BTRFS_MAGIC)
153 		return -EIO;
154 
155 	csum_type = btrfs_super_csum_type(sb);
156 	if (csum_type >= btrfs_super_num_csums()) {
157 		error("unsupported checksum algorithm %u", csum_type);
158 		return -EIO;
159 	}
160 	csum_size = btrfs_super_csum_size(sb);
161 
162 	btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
163 			result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
164 
165 	if (memcmp(result, sb->csum, csum_size)) {
166 		error("superblock checksum mismatch");
167 		return -EIO;
168 	}
169 	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
170 		error("tree_root level too big: %d >= %d",
171 			btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
172 		goto error_out;
173 	}
174 	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
175 		error("chunk_root level too big: %d >= %d",
176 			btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
177 		goto error_out;
178 	}
179 	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
180 		error("log_root level too big: %d >= %d",
181 			btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
182 		goto error_out;
183 	}
184 
185 	if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
186 		error("tree_root block unaligned: %llu", btrfs_super_root(sb));
187 		goto error_out;
188 	}
189 	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
190 		error("chunk_root block unaligned: %llu",
191 			btrfs_super_chunk_root(sb));
192 		goto error_out;
193 	}
194 	if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
195 		error("log_root block unaligned: %llu",
196 			btrfs_super_log_root(sb));
197 		goto error_out;
198 	}
199 	if (btrfs_super_nodesize(sb) < 4096) {
200 		error("nodesize too small: %u < 4096",
201 			btrfs_super_nodesize(sb));
202 		goto error_out;
203 	}
204 	if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
205 		error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
206 		goto error_out;
207 	}
208 	if (btrfs_super_sectorsize(sb) < 4096) {
209 		error("sectorsize too small: %u < 4096",
210 			btrfs_super_sectorsize(sb));
211 		goto error_out;
212 	}
213 	if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
214 		error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
215 		goto error_out;
216 	}
217 	if (btrfs_super_total_bytes(sb) == 0) {
218 		error("invalid total_bytes 0");
219 		goto error_out;
220 	}
221 	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
222 		error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
223 		goto error_out;
224 	}
225 	if ((btrfs_super_stripesize(sb) != 4096)
226 		&& (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
227 		error("invalid stripesize %u", btrfs_super_stripesize(sb));
228 		goto error_out;
229 	}
230 
231 	if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
232 		metadata_uuid = sb->metadata_uuid;
233 	else
234 		metadata_uuid = sb->fsid;
235 
236 	if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
237 		char fsid[BTRFS_UUID_UNPARSED_SIZE];
238 		char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
239 
240 		uuid_unparse(sb->metadata_uuid, fsid);
241 		uuid_unparse(sb->dev_item.fsid, dev_fsid);
242 		error("dev_item UUID does not match fsid: %s != %s",
243 			dev_fsid, fsid);
244 		goto error_out;
245 	}
246 
247 	/*
248 	 * Hint to catch really bogus numbers, bitflips or so
249 	 */
250 	if (btrfs_super_num_devices(sb) > (1UL << 31)) {
251 		error("suspicious number of devices: %llu",
252 			btrfs_super_num_devices(sb));
253 	}
254 
255 	if (btrfs_super_num_devices(sb) == 0) {
256 		error("number of devices is 0");
257 		goto error_out;
258 	}
259 
260 	/*
261 	 * Obvious sys_chunk_array corruptions, it must hold at least one key
262 	 * and one chunk
263 	 */
264 	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
265 		error("system chunk array too big %u > %u",
266 		      btrfs_super_sys_array_size(sb),
267 		      BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
268 		goto error_out;
269 	}
270 	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
271 			+ sizeof(struct btrfs_chunk)) {
272 		error("system chunk array too small %u < %zu",
273 		      btrfs_super_sys_array_size(sb),
274 		      sizeof(struct btrfs_disk_key) +
275 		      sizeof(struct btrfs_chunk));
276 		goto error_out;
277 	}
278 
279 	return 0;
280 
281 error_out:
282 	error("superblock checksum matches but it has invalid members");
283 	return -EIO;
284 }
285 
286 /*
287  * btrfs_read_dev_super - read a valid primary superblock from a block device
288  * @desc,@part:	file descriptor of the device
289  * @sb:		buffer where the superblock is going to be read in
290  *
291  * Unlike the btrfs-progs/kernel version, here we ony care about the first
292  * super block, thus it's much simpler.
293  */
btrfs_read_dev_super(struct blk_desc * desc,struct disk_partition * part,struct btrfs_super_block * sb)294 int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
295 			 struct btrfs_super_block *sb)
296 {
297 	ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE);
298 	struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
299 	int ret;
300 
301 	ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
302 			      BTRFS_SUPER_INFO_OFFSET);
303 	if (ret < BTRFS_SUPER_INFO_SIZE)
304 		return -EIO;
305 
306 	if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
307 		return -EIO;
308 
309 	if (btrfs_check_super(buf))
310 		return -EIO;
311 
312 	memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
313 	return 0;
314 }
315 
__csum_tree_block_size(struct extent_buffer * buf,u16 csum_size,int verify,int silent,u16 csum_type)316 static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
317 				  int verify, int silent, u16 csum_type)
318 {
319 	u8 result[BTRFS_CSUM_SIZE];
320 	u32 len;
321 
322 	len = buf->len - BTRFS_CSUM_SIZE;
323 	btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
324 			result, len);
325 
326 	if (verify) {
327 		if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
328 			/* FIXME: format */
329 			if (!silent)
330 				printk("checksum verify failed on %llu found %08X wanted %08X\n",
331 				       (unsigned long long)buf->start,
332 				       result[0],
333 				       buf->data[0]);
334 			return 1;
335 		}
336 	} else {
337 		write_extent_buffer(buf, result, 0, csum_size);
338 	}
339 	return 0;
340 }
341 
csum_tree_block_size(struct extent_buffer * buf,u16 csum_size,int verify,u16 csum_type)342 int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
343 			 u16 csum_type)
344 {
345 	return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
346 }
347 
csum_tree_block(struct btrfs_fs_info * fs_info,struct extent_buffer * buf,int verify)348 static int csum_tree_block(struct btrfs_fs_info *fs_info,
349 			   struct extent_buffer *buf, int verify)
350 {
351 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
352 	u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
353 
354 	return csum_tree_block_size(buf, csum_size, verify, csum_type);
355 }
356 
btrfs_find_tree_block(struct btrfs_fs_info * fs_info,u64 bytenr,u32 blocksize)357 struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
358 					    u64 bytenr, u32 blocksize)
359 {
360 	return find_extent_buffer(&fs_info->extent_cache,
361 				  bytenr, blocksize);
362 }
363 
btrfs_find_create_tree_block(struct btrfs_fs_info * fs_info,u64 bytenr)364 struct extent_buffer* btrfs_find_create_tree_block(
365 		struct btrfs_fs_info *fs_info, u64 bytenr)
366 {
367 	return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
368 }
369 
verify_parent_transid(struct extent_io_tree * io_tree,struct extent_buffer * eb,u64 parent_transid,int ignore)370 static int verify_parent_transid(struct extent_io_tree *io_tree,
371 				 struct extent_buffer *eb, u64 parent_transid,
372 				 int ignore)
373 {
374 	int ret;
375 
376 	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
377 		return 0;
378 
379 	if (extent_buffer_uptodate(eb) &&
380 	    btrfs_header_generation(eb) == parent_transid) {
381 		ret = 0;
382 		goto out;
383 	}
384 	printk("parent transid verify failed on %llu wanted %llu found %llu\n",
385 	       (unsigned long long)eb->start,
386 	       (unsigned long long)parent_transid,
387 	       (unsigned long long)btrfs_header_generation(eb));
388 	if (ignore) {
389 		eb->flags |= EXTENT_BAD_TRANSID;
390 		printk("Ignoring transid failure\n");
391 		return 0;
392 	}
393 
394 	ret = 1;
395 out:
396 	clear_extent_buffer_uptodate(eb);
397 	return ret;
398 
399 }
400 
read_whole_eb(struct btrfs_fs_info * info,struct extent_buffer * eb,int mirror)401 int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
402 {
403 	unsigned long offset = 0;
404 	struct btrfs_multi_bio *multi = NULL;
405 	struct btrfs_device *device;
406 	int ret = 0;
407 	u64 read_len;
408 	unsigned long bytes_left = eb->len;
409 
410 	while (bytes_left) {
411 		read_len = bytes_left;
412 		device = NULL;
413 
414 		ret = btrfs_map_block(info, READ, eb->start + offset,
415 				      &read_len, &multi, mirror, NULL);
416 		if (ret) {
417 			printk("Couldn't map the block %Lu\n", eb->start + offset);
418 			kfree(multi);
419 			return -EIO;
420 		}
421 		device = multi->stripes[0].dev;
422 
423 		if (!device->desc || !device->part) {
424 			kfree(multi);
425 			return -EIO;
426 		}
427 
428 		if (read_len > bytes_left)
429 			read_len = bytes_left;
430 
431 		ret = read_extent_from_disk(device->desc, device->part,
432 					    multi->stripes[0].physical, eb,
433 					    offset, read_len);
434 		kfree(multi);
435 		multi = NULL;
436 
437 		if (ret)
438 			return -EIO;
439 		offset += read_len;
440 		bytes_left -= read_len;
441 	}
442 	return 0;
443 }
444 
read_tree_block(struct btrfs_fs_info * fs_info,u64 bytenr,u64 parent_transid)445 struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
446 		u64 parent_transid)
447 {
448 	int ret;
449 	struct extent_buffer *eb;
450 	u64 best_transid = 0;
451 	u32 sectorsize = fs_info->sectorsize;
452 	int mirror_num = 1;
453 	int good_mirror = 0;
454 	int candidate_mirror = 0;
455 	int num_copies;
456 	int ignore = 0;
457 
458 	/*
459 	 * Don't even try to create tree block for unaligned tree block
460 	 * bytenr.
461 	 * Such unaligned tree block will free overlapping extent buffer,
462 	 * causing use-after-free bugs for fuzzed images.
463 	 */
464 	if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
465 		error("tree block bytenr %llu is not aligned to sectorsize %u",
466 		      bytenr, sectorsize);
467 		return ERR_PTR(-EIO);
468 	}
469 
470 	eb = btrfs_find_create_tree_block(fs_info, bytenr);
471 	if (!eb)
472 		return ERR_PTR(-ENOMEM);
473 
474 	if (btrfs_buffer_uptodate(eb, parent_transid))
475 		return eb;
476 
477 	num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
478 	while (1) {
479 		ret = read_whole_eb(fs_info, eb, mirror_num);
480 		if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
481 		    check_tree_block(fs_info, eb) == 0 &&
482 		    verify_parent_transid(&fs_info->extent_cache, eb,
483 					  parent_transid, ignore) == 0) {
484 			/*
485 			 * check_tree_block() is less strict to allow btrfs
486 			 * check to get raw eb with bad key order and fix it.
487 			 * But we still need to try to get a good copy if
488 			 * possible, or bad key order can go into tools like
489 			 * btrfs ins dump-tree.
490 			 */
491 			if (btrfs_header_level(eb))
492 				ret = btrfs_check_node(fs_info, NULL, eb);
493 			else
494 				ret = btrfs_check_leaf(fs_info, NULL, eb);
495 			if (!ret || candidate_mirror == mirror_num) {
496 				btrfs_set_buffer_uptodate(eb);
497 				return eb;
498 			}
499 			if (candidate_mirror <= 0)
500 				candidate_mirror = mirror_num;
501 		}
502 		if (ignore) {
503 			if (candidate_mirror > 0) {
504 				mirror_num = candidate_mirror;
505 				continue;
506 			}
507 			if (check_tree_block(fs_info, eb))
508 				print_tree_block_error(fs_info, eb,
509 						check_tree_block(fs_info, eb));
510 			else
511 				fprintf(stderr, "Csum didn't match\n");
512 			ret = -EIO;
513 			break;
514 		}
515 		if (num_copies == 1) {
516 			ignore = 1;
517 			continue;
518 		}
519 		if (btrfs_header_generation(eb) > best_transid) {
520 			best_transid = btrfs_header_generation(eb);
521 			good_mirror = mirror_num;
522 		}
523 		mirror_num++;
524 		if (mirror_num > num_copies) {
525 			if (candidate_mirror > 0)
526 				mirror_num = candidate_mirror;
527 			else
528 				mirror_num = good_mirror;
529 			ignore = 1;
530 			continue;
531 		}
532 	}
533 	/*
534 	 * We failed to read this tree block, it be should deleted right now
535 	 * to avoid stale cache populate the cache.
536 	 */
537 	free_extent_buffer(eb);
538 	return ERR_PTR(ret);
539 }
540 
read_extent_data(struct btrfs_fs_info * fs_info,char * data,u64 logical,u64 * len,int mirror)541 int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
542 		     u64 *len, int mirror)
543 {
544 	u64 orig_len = *len;
545 	u64 cur = logical;
546 	struct btrfs_multi_bio *multi = NULL;
547 	struct btrfs_device *device;
548 	int ret = 0;
549 
550 	while (cur < logical + orig_len) {
551 		u64 cur_len = logical + orig_len - cur;
552 
553 		ret = btrfs_map_block(fs_info, READ, cur, &cur_len, &multi,
554 				      mirror, NULL);
555 		if (ret) {
556 			error("Couldn't map the block %llu", cur);
557 			goto err;
558 		}
559 		device = multi->stripes[0].dev;
560 		if (!device->desc || !device->part) {
561 			error("devid %llu is missing", device->devid);
562 			ret = -EIO;
563 			goto err;
564 		}
565 		ret = __btrfs_devread(device->desc, device->part,
566 				data + (cur - logical), cur_len,
567 				multi->stripes[0].physical);
568 		if (ret != cur_len) {
569 			error("read failed on devid %llu physical %llu",
570 			      device->devid, multi->stripes[0].physical);
571 			ret = -EIO;
572 			goto err;
573 		}
574 		cur += cur_len;
575 		ret = 0;
576 	}
577 err:
578 	kfree(multi);
579 	return ret;
580 }
581 
btrfs_setup_root(struct btrfs_root * root,struct btrfs_fs_info * fs_info,u64 objectid)582 void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
583 		      u64 objectid)
584 {
585 	root->node = NULL;
586 	root->track_dirty = 0;
587 
588 	root->fs_info = fs_info;
589 	root->objectid = objectid;
590 	root->last_trans = 0;
591 	root->last_inode_alloc = 0;
592 
593 	memset(&root->root_key, 0, sizeof(root->root_key));
594 	memset(&root->root_item, 0, sizeof(root->root_item));
595 	root->root_key.objectid = objectid;
596 }
597 
find_and_setup_root(struct btrfs_root * tree_root,struct btrfs_fs_info * fs_info,u64 objectid,struct btrfs_root * root)598 static int find_and_setup_root(struct btrfs_root *tree_root,
599 			       struct btrfs_fs_info *fs_info,
600 			       u64 objectid, struct btrfs_root *root)
601 {
602 	int ret;
603 	u64 generation;
604 
605 	btrfs_setup_root(root, fs_info, objectid);
606 	ret = btrfs_find_last_root(tree_root, objectid,
607 				   &root->root_item, &root->root_key);
608 	if (ret)
609 		return ret;
610 
611 	generation = btrfs_root_generation(&root->root_item);
612 	root->node = read_tree_block(fs_info,
613 			btrfs_root_bytenr(&root->root_item), generation);
614 	if (!extent_buffer_uptodate(root->node))
615 		return -EIO;
616 
617 	return 0;
618 }
619 
btrfs_free_fs_root(struct btrfs_root * root)620 int btrfs_free_fs_root(struct btrfs_root *root)
621 {
622 	if (root->node)
623 		free_extent_buffer(root->node);
624 	kfree(root);
625 	return 0;
626 }
627 
__free_fs_root(struct rb_node * node)628 static void __free_fs_root(struct rb_node *node)
629 {
630 	struct btrfs_root *root;
631 
632 	root = container_of(node, struct btrfs_root, rb_node);
633 	btrfs_free_fs_root(root);
634 }
635 
636 FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
637 
btrfs_read_fs_root_no_cache(struct btrfs_fs_info * fs_info,struct btrfs_key * location)638 struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
639 					       struct btrfs_key *location)
640 {
641 	struct btrfs_root *root;
642 	struct btrfs_root *tree_root = fs_info->tree_root;
643 	struct btrfs_path *path;
644 	struct extent_buffer *l;
645 	u64 generation;
646 	int ret = 0;
647 
648 	root = calloc(1, sizeof(*root));
649 	if (!root)
650 		return ERR_PTR(-ENOMEM);
651 	if (location->offset == (u64)-1) {
652 		ret = find_and_setup_root(tree_root, fs_info,
653 					  location->objectid, root);
654 		if (ret) {
655 			free(root);
656 			return ERR_PTR(ret);
657 		}
658 		goto insert;
659 	}
660 
661 	btrfs_setup_root(root, fs_info,
662 			 location->objectid);
663 
664 	path = btrfs_alloc_path();
665 	if (!path) {
666 		free(root);
667 		return ERR_PTR(-ENOMEM);
668 	}
669 
670 	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
671 	if (ret != 0) {
672 		if (ret > 0)
673 			ret = -ENOENT;
674 		goto out;
675 	}
676 	l = path->nodes[0];
677 	read_extent_buffer(l, &root->root_item,
678 	       btrfs_item_ptr_offset(l, path->slots[0]),
679 	       sizeof(root->root_item));
680 	memcpy(&root->root_key, location, sizeof(*location));
681 
682 	/* If this root is already an orphan, no need to read */
683 	if (btrfs_root_refs(&root->root_item) == 0) {
684 		ret = -ENOENT;
685 		goto out;
686 	}
687 	ret = 0;
688 out:
689 	btrfs_free_path(path);
690 	if (ret) {
691 		free(root);
692 		return ERR_PTR(ret);
693 	}
694 	generation = btrfs_root_generation(&root->root_item);
695 	root->node = read_tree_block(fs_info,
696 			btrfs_root_bytenr(&root->root_item), generation);
697 	if (!extent_buffer_uptodate(root->node)) {
698 		free(root);
699 		return ERR_PTR(-EIO);
700 	}
701 insert:
702 	root->ref_cows = 1;
703 	return root;
704 }
705 
btrfs_fs_roots_compare_objectids(struct rb_node * node,void * data)706 static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
707 					    void *data)
708 {
709 	u64 objectid = *((u64 *)data);
710 	struct btrfs_root *root;
711 
712 	root = rb_entry(node, struct btrfs_root, rb_node);
713 	if (objectid > root->objectid)
714 		return 1;
715 	else if (objectid < root->objectid)
716 		return -1;
717 	else
718 		return 0;
719 }
720 
btrfs_fs_roots_compare_roots(struct rb_node * node1,struct rb_node * node2)721 int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
722 {
723 	struct btrfs_root *root;
724 
725 	root = rb_entry(node2, struct btrfs_root, rb_node);
726 	return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
727 }
728 
btrfs_read_fs_root(struct btrfs_fs_info * fs_info,struct btrfs_key * location)729 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
730 				      struct btrfs_key *location)
731 {
732 	struct btrfs_root *root;
733 	struct rb_node *node;
734 	int ret;
735 	u64 objectid = location->objectid;
736 
737 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
738 		return fs_info->tree_root;
739 	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
740 		return fs_info->chunk_root;
741 	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
742 		return fs_info->csum_root;
743 	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
744 
745 	node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
746 			 btrfs_fs_roots_compare_objectids, NULL);
747 	if (node)
748 		return container_of(node, struct btrfs_root, rb_node);
749 
750 	root = btrfs_read_fs_root_no_cache(fs_info, location);
751 	if (IS_ERR(root))
752 		return root;
753 
754 	ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
755 			btrfs_fs_roots_compare_roots);
756 	BUG_ON(ret);
757 	return root;
758 }
759 
btrfs_free_fs_info(struct btrfs_fs_info * fs_info)760 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
761 {
762 	free(fs_info->tree_root);
763 	free(fs_info->chunk_root);
764 	free(fs_info->csum_root);
765 	free(fs_info->super_copy);
766 	free(fs_info);
767 }
768 
btrfs_new_fs_info(void)769 struct btrfs_fs_info *btrfs_new_fs_info(void)
770 {
771 	struct btrfs_fs_info *fs_info;
772 
773 	fs_info = calloc(1, sizeof(struct btrfs_fs_info));
774 	if (!fs_info)
775 		return NULL;
776 
777 	fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
778 	fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
779 	fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
780 	fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
781 
782 	if (!fs_info->tree_root || !fs_info->chunk_root ||
783 	    !fs_info->csum_root || !fs_info->super_copy)
784 		goto free_all;
785 
786 	extent_io_tree_init(&fs_info->extent_cache);
787 
788 	fs_info->fs_root_tree = RB_ROOT;
789 	cache_tree_init(&fs_info->mapping_tree.cache_tree);
790 
791 	return fs_info;
792 free_all:
793 	btrfs_free_fs_info(fs_info);
794 	return NULL;
795 }
796 
setup_root_or_create_block(struct btrfs_fs_info * fs_info,struct btrfs_root * info_root,u64 objectid,char * str)797 static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
798 				      struct btrfs_root *info_root,
799 				      u64 objectid, char *str)
800 {
801 	struct btrfs_root *root = fs_info->tree_root;
802 	int ret;
803 
804 	ret = find_and_setup_root(root, fs_info, objectid, info_root);
805 	if (ret) {
806 		error("could not setup %s tree", str);
807 		return -EIO;
808 	}
809 
810 	return 0;
811 }
812 
get_default_subvolume(struct btrfs_fs_info * fs_info,struct btrfs_key * key_ret)813 static int get_default_subvolume(struct btrfs_fs_info *fs_info,
814 				 struct btrfs_key *key_ret)
815 {
816 	struct btrfs_root *root = fs_info->tree_root;
817 	struct btrfs_dir_item *dir_item;
818 	struct btrfs_path path;
819 	int ret = 0;
820 
821 	btrfs_init_path(&path);
822 
823 	dir_item = btrfs_lookup_dir_item(NULL, root, &path,
824 					 BTRFS_ROOT_TREE_DIR_OBJECTID,
825 					 "default", 7, 0);
826 	if (IS_ERR(dir_item)) {
827 		ret = PTR_ERR(dir_item);
828 		goto out;
829 	}
830 
831 	btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
832 out:
833 	btrfs_release_path(&path);
834 	return ret;
835 }
836 
btrfs_setup_all_roots(struct btrfs_fs_info * fs_info)837 int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
838 {
839 	struct btrfs_super_block *sb = fs_info->super_copy;
840 	struct btrfs_root *root;
841 	struct btrfs_key key;
842 	u64 root_tree_bytenr;
843 	u64 generation;
844 	int ret;
845 
846 	root = fs_info->tree_root;
847 	btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
848 	generation = btrfs_super_generation(sb);
849 
850 	root_tree_bytenr = btrfs_super_root(sb);
851 
852 	root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
853 	if (!extent_buffer_uptodate(root->node)) {
854 		fprintf(stderr, "Couldn't read tree root\n");
855 		return -EIO;
856 	}
857 
858 	ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
859 					 BTRFS_CSUM_TREE_OBJECTID, "csum");
860 	if (ret)
861 		return ret;
862 	fs_info->csum_root->track_dirty = 1;
863 
864 	fs_info->last_trans_committed = generation;
865 
866 	ret = get_default_subvolume(fs_info, &key);
867 	if (ret) {
868 		/*
869 		 * The default dir item isn't there. Linux kernel behaviour is
870 		 * to silently use the top-level subvolume in this case.
871 		 */
872 		key.objectid = BTRFS_FS_TREE_OBJECTID;
873 		key.type = BTRFS_ROOT_ITEM_KEY;
874 		key.offset = (u64)-1;
875 	}
876 
877 	fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
878 
879 	if (IS_ERR(fs_info->fs_root))
880 		return -EIO;
881 	return 0;
882 }
883 
btrfs_release_all_roots(struct btrfs_fs_info * fs_info)884 void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
885 {
886 	if (fs_info->csum_root)
887 		free_extent_buffer(fs_info->csum_root->node);
888 	if (fs_info->tree_root)
889 		free_extent_buffer(fs_info->tree_root->node);
890 	if (fs_info->chunk_root)
891 		free_extent_buffer(fs_info->chunk_root->node);
892 }
893 
free_map_lookup(struct cache_extent * ce)894 static void free_map_lookup(struct cache_extent *ce)
895 {
896 	struct map_lookup *map;
897 
898 	map = container_of(ce, struct map_lookup, ce);
899 	kfree(map);
900 }
901 
902 FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
903 
btrfs_cleanup_all_caches(struct btrfs_fs_info * fs_info)904 void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
905 {
906 	free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
907 	extent_io_tree_cleanup(&fs_info->extent_cache);
908 }
909 
btrfs_scan_fs_devices(struct blk_desc * desc,struct disk_partition * part,struct btrfs_fs_devices ** fs_devices)910 static int btrfs_scan_fs_devices(struct blk_desc *desc,
911 				 struct disk_partition *part,
912 				 struct btrfs_fs_devices **fs_devices)
913 {
914 	u64 total_devs;
915 	int ret;
916 
917 	if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
918 		     desc->blksz) > (part->size << desc->log2blksz)) {
919 		log_debug("superblock end %u is larger than device size " LBAFU,
920 			  BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
921 			  part->size << desc->log2blksz);
922 		return -EINVAL;
923 	}
924 
925 	ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
926 	if (ret) {
927 		/*
928 		 * Avoid showing this when probing for a possible Btrfs
929 		 *
930 		 * fprintf(stderr, "No valid Btrfs found\n");
931 		 */
932 		return ret;
933 	}
934 	return 0;
935 }
936 
btrfs_check_fs_compatibility(struct btrfs_super_block * sb)937 int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
938 {
939 	u64 features;
940 
941 	features = btrfs_super_incompat_flags(sb) &
942 		   ~BTRFS_FEATURE_INCOMPAT_SUPP;
943 	if (features) {
944 		printk("couldn't open because of unsupported "
945 		       "option features (%llx).\n",
946 		       (unsigned long long)features);
947 		return -ENOTSUPP;
948 	}
949 
950 	features = btrfs_super_incompat_flags(sb);
951 	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
952 		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
953 		btrfs_set_super_incompat_flags(sb, features);
954 	}
955 
956 	return 0;
957 }
958 
btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info * fs_info)959 static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
960 {
961 	struct btrfs_super_block *sb = fs_info->super_copy;
962 	u64 chunk_root_bytenr;
963 	u64 generation;
964 	int ret;
965 
966 	btrfs_setup_root(fs_info->chunk_root, fs_info,
967 			BTRFS_CHUNK_TREE_OBJECTID);
968 
969 	ret = btrfs_read_sys_array(fs_info);
970 	if (ret)
971 		return ret;
972 
973 	generation = btrfs_super_chunk_root_generation(sb);
974 	chunk_root_bytenr = btrfs_super_chunk_root(sb);
975 
976 	fs_info->chunk_root->node = read_tree_block(fs_info,
977 						    chunk_root_bytenr,
978 						    generation);
979 	if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
980 		error("cannot read chunk root");
981 		return -EIO;
982 	}
983 
984 	ret = btrfs_read_chunk_tree(fs_info);
985 	if (ret) {
986 		fprintf(stderr, "Couldn't read chunk tree\n");
987 		return ret;
988 	}
989 	return 0;
990 }
991 
open_ctree_fs_info(struct blk_desc * desc,struct disk_partition * part)992 struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
993 					 struct disk_partition *part)
994 {
995 	struct btrfs_fs_info *fs_info;
996 	struct btrfs_super_block *disk_super;
997 	struct btrfs_fs_devices *fs_devices = NULL;
998 	struct extent_buffer *eb;
999 	int ret;
1000 
1001 	fs_info = btrfs_new_fs_info();
1002 	if (!fs_info) {
1003 		fprintf(stderr, "Failed to allocate memory for fs_info\n");
1004 		return NULL;
1005 	}
1006 
1007 	ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
1008 	if (ret)
1009 		goto out;
1010 
1011 	fs_info->fs_devices = fs_devices;
1012 
1013 	ret = btrfs_open_devices(fs_devices);
1014 	if (ret)
1015 		goto out;
1016 
1017 	disk_super = fs_info->super_copy;
1018 	ret = btrfs_read_dev_super(desc, part, disk_super);
1019 	if (ret) {
1020 		debug("No valid btrfs found\n");
1021 		goto out_devices;
1022 	}
1023 
1024 	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
1025 		fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
1026 		goto out_devices;
1027 	}
1028 
1029 	ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
1030 	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
1031 		ASSERT(!memcmp(disk_super->metadata_uuid,
1032 			       fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
1033 
1034 	fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
1035 	fs_info->nodesize = btrfs_super_nodesize(disk_super);
1036 	fs_info->stripesize = btrfs_super_stripesize(disk_super);
1037 
1038 	ret = btrfs_check_fs_compatibility(fs_info->super_copy);
1039 	if (ret)
1040 		goto out_devices;
1041 
1042 	ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1043 	if (ret)
1044 		goto out_chunk;
1045 
1046 	/* Chunk tree root is unable to read, return directly */
1047 	if (!fs_info->chunk_root)
1048 		return fs_info;
1049 
1050 	eb = fs_info->chunk_root->node;
1051 	read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1052 			   btrfs_header_chunk_tree_uuid(eb),
1053 			   BTRFS_UUID_SIZE);
1054 
1055 	ret = btrfs_setup_all_roots(fs_info);
1056 	if (ret)
1057 		goto out_chunk;
1058 
1059 	return fs_info;
1060 
1061 out_chunk:
1062 	btrfs_release_all_roots(fs_info);
1063 	btrfs_cleanup_all_caches(fs_info);
1064 out_devices:
1065 	btrfs_close_devices(fs_devices);
1066 out:
1067 	btrfs_free_fs_info(fs_info);
1068 	return NULL;
1069 }
1070 
close_ctree_fs_info(struct btrfs_fs_info * fs_info)1071 int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
1072 {
1073 	int ret;
1074 
1075 	free_fs_roots_tree(&fs_info->fs_root_tree);
1076 
1077 	btrfs_release_all_roots(fs_info);
1078 	ret = btrfs_close_devices(fs_info->fs_devices);
1079 	btrfs_cleanup_all_caches(fs_info);
1080 	btrfs_free_fs_info(fs_info);
1081 	return ret;
1082 }
1083 
btrfs_buffer_uptodate(struct extent_buffer * buf,u64 parent_transid)1084 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1085 {
1086 	int ret;
1087 
1088 	ret = extent_buffer_uptodate(buf);
1089 	if (!ret)
1090 		return ret;
1091 
1092 	ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
1093 				    parent_transid, 1);
1094 	return !ret;
1095 }
1096 
btrfs_set_buffer_uptodate(struct extent_buffer * eb)1097 int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1098 {
1099 	return set_extent_buffer_uptodate(eb);
1100 }
1101