1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3 * GRUB -- GRand Unified Bootloader
4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <fsimage_grub.h>
21
22 #undef REISERDEBUG
23
24 /* Some parts of this code (mainly the structures and defines) are
25 * from the original reiser fs code, as found in the linux kernel.
26 */
27
28 /* include/asm-i386/types.h */
29 typedef __signed__ char __s8;
30 typedef unsigned char __u8;
31 typedef __signed__ short __s16;
32 typedef unsigned short __u16;
33 typedef __signed__ int __s32;
34 typedef unsigned int __u32;
35 typedef unsigned long long __u64;
36
37 /* linux/posix_type.h */
38 typedef long linux_off_t;
39
40 /* linux/little_endian.h */
41 #define __cpu_to_le64(x) ((__u64) (x))
42 #define __le64_to_cpu(x) ((__u64) (x))
43 #define __cpu_to_le32(x) ((__u32) (x))
44 #define __le32_to_cpu(x) ((__u32) (x))
45 #define __cpu_to_le16(x) ((__u16) (x))
46 #define __le16_to_cpu(x) ((__u16) (x))
47
48 /* include/linux/reiser_fs.h */
49 /* This is the new super block of a journaling reiserfs system */
50 struct reiserfs_super_block
51 {
52 __u32 s_block_count; /* blocks count */
53 __u32 s_free_blocks; /* free blocks count */
54 __u32 s_root_block; /* root block number */
55 __u32 s_journal_block; /* journal block number */
56 __u32 s_journal_dev; /* journal device number */
57 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
58 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */
59 __u32 s_journal_magic; /* random value made on fs creation */
60 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */
61 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */
62 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */
63 __u16 s_blocksize; /* block size */
64 __u16 s_oid_maxsize; /* max size of object id array */
65 __u16 s_oid_cursize; /* current size of object id array */
66 __u16 s_state; /* valid or error */
67 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
68 __u16 s_tree_height; /* height of disk tree */
69 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
70 __u16 s_version;
71 char s_unused[128]; /* zero filled by mkreiserfs */
72 };
73
74 #define REISERFS_MAX_SUPPORTED_VERSION 2
75 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
76 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
77 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
78
79 #define MAX_HEIGHT 7
80
81 /* must be correct to keep the desc and commit structs at 4k */
82 #define JOURNAL_TRANS_HALF 1018
83
84 /* first block written in a commit. */
85 struct reiserfs_journal_desc {
86 __u32 j_trans_id; /* id of commit */
87 __u32 j_len; /* length of commit. len +1 is the commit block */
88 __u32 j_mount_id; /* mount id of this trans*/
89 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
90 char j_magic[12];
91 };
92
93 /* last block written in a commit */
94 struct reiserfs_journal_commit {
95 __u32 j_trans_id; /* must match j_trans_id from the desc block */
96 __u32 j_len; /* ditto */
97 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
98 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
99 };
100
101 /* this header block gets written whenever a transaction is considered
102 fully flushed, and is more recent than the last fully flushed
103 transaction.
104 fully flushed means all the log blocks and all the real blocks are
105 on disk, and this transaction does not need to be replayed.
106 */
107 struct reiserfs_journal_header {
108 /* id of last fully flushed transaction */
109 __u32 j_last_flush_trans_id;
110 /* offset in the log of where to start replay after a crash */
111 __u32 j_first_unflushed_offset;
112 /* mount id to detect very old transactions */
113 __u32 j_mount_id;
114 };
115
116 /* magic string to find desc blocks in the journal */
117 #define JOURNAL_DESC_MAGIC "ReIsErLB"
118
119
120 /*
121 * directories use this key as well as old files
122 */
123 struct offset_v1
124 {
125 /*
126 * for regular files this is the offset to the first byte of the
127 * body, contained in the object-item, as measured from the start of
128 * the entire body of the object.
129 *
130 * for directory entries, k_offset consists of hash derived from
131 * hashing the name and using few bits (23 or more) of the resulting
132 * hash, and generation number that allows distinguishing names with
133 * hash collisions. If number of collisions overflows generation
134 * number, we return EEXIST. High order bit is 0 always
135 */
136 __u32 k_offset;
137 __u32 k_uniqueness;
138 };
139
140 struct offset_v2
141 {
142 /*
143 * for regular files this is the offset to the first byte of the
144 * body, contained in the object-item, as measured from the start of
145 * the entire body of the object.
146 *
147 * for directory entries, k_offset consists of hash derived from
148 * hashing the name and using few bits (23 or more) of the resulting
149 * hash, and generation number that allows distinguishing names with
150 * hash collisions. If number of collisions overflows generation
151 * number, we return EEXIST. High order bit is 0 always
152 */
153 __u64 k_offset:60;
154 __u64 k_type: 4;
155 };
156
157
158 struct key
159 {
160 /* packing locality: by default parent directory object id */
161 __u32 k_dir_id;
162 /* object identifier */
163 __u32 k_objectid;
164 /* the offset and node type (old and new form) */
165 union
166 {
167 struct offset_v1 v1;
168 struct offset_v2 v2;
169 }
170 u;
171 };
172
173 #define KEY_SIZE (sizeof (struct key))
174
175 /* Header of a disk block. More precisely, header of a formatted leaf
176 or internal node, and not the header of an unformatted node. */
177 struct block_head
178 {
179 __u16 blk_level; /* Level of a block in the tree. */
180 __u16 blk_nr_item; /* Number of keys/items in a block. */
181 __u16 blk_free_space; /* Block free space in bytes. */
182 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
183 only) */
184 };
185 #define BLKH_SIZE (sizeof (struct block_head))
186 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
187
188 struct item_head
189 {
190 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/
191
192 union
193 {
194 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
195 is an indirect item. This equals 0xFFFF iff this is a direct item or
196 stat data item. Note that the key, not this field, is used to determine
197 the item type, and thus which field this union contains. */
198 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
199 entries in the directory item. */
200 }
201 u;
202 __u16 ih_item_len; /* total size of the item body */
203 __u16 ih_item_location; /* an offset to the item body within the block */
204 __u16 ih_version; /* ITEM_VERSION_1 for all old items,
205 ITEM_VERSION_2 for new ones.
206 Highest bit is set by fsck
207 temporary, cleaned after all done */
208 };
209 /* size of item header */
210 #define IH_SIZE (sizeof (struct item_head))
211
212 #define ITEM_VERSION_1 0
213 #define ITEM_VERSION_2 1
214 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
215 ? (ih)->ih_key.u.v1.k_offset \
216 : (ih)->ih_key.u.v2.k_offset)
217
218 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
219 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
220 : (ih)->ih_key.u.v2.k_type == V2_##type)
221
222 struct disk_child
223 {
224 __u32 dc_block_number; /* Disk child's block number. */
225 __u16 dc_size; /* Disk child's used space. */
226 };
227
228 #define DC_SIZE (sizeof (struct disk_child))
229
230 /* Stat Data on disk.
231 *
232 * Note that reiserfs has two different forms of stat data. Luckily
233 * the fields needed by grub are at the same position.
234 */
235 struct stat_data
236 {
237 __u16 sd_mode; /* file type, permissions */
238 __u16 sd_notused1[3]; /* fields not needed by reiserfs */
239 __u32 sd_size; /* file size */
240 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */
241 };
242
243 struct reiserfs_de_head
244 {
245 __u32 deh_offset; /* third component of the directory entry key */
246 __u32 deh_dir_id; /* objectid of the parent directory of the
247 object, that is referenced by directory entry */
248 __u32 deh_objectid;/* objectid of the object, that is referenced by
249 directory entry */
250 __u16 deh_location;/* offset of name in the whole item */
251 __u16 deh_state; /* whether 1) entry contains stat data (for
252 future), and 2) whether entry is hidden
253 (unlinked) */
254 };
255
256 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
257
258 #define DEH_Statdata (1 << 0) /* not used now */
259 #define DEH_Visible (1 << 2)
260
261 #define SD_OFFSET 0
262 #define SD_UNIQUENESS 0
263 #define DOT_OFFSET 1
264 #define DOT_DOT_OFFSET 2
265 #define DIRENTRY_UNIQUENESS 500
266
267 #define V1_TYPE_STAT_DATA 0x0
268 #define V1_TYPE_DIRECT 0xffffffff
269 #define V1_TYPE_INDIRECT 0xfffffffe
270 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
271 #define V2_TYPE_STAT_DATA 0
272 #define V2_TYPE_INDIRECT 1
273 #define V2_TYPE_DIRECT 2
274 #define V2_TYPE_DIRENTRY 3
275
276 #define REISERFS_ROOT_OBJECTID 2
277 #define REISERFS_ROOT_PARENT_OBJECTID 1
278 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
279 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
280 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
281 #define REISERFS_OLD_BLOCKSIZE 4096
282
283 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
284 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
285 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
286
287 #define PATH_MAX 1024 /* include/linux/limits.h */
288 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */
289
290 /* The size of the node cache */
291 #define FSYSREISER_CACHE_SIZE 24*1024
292 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
293 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
294
295 /* Info about currently opened file */
296 struct fsys_reiser_fileinfo
297 {
298 __u32 k_dir_id;
299 __u32 k_objectid;
300 };
301
302 /* In memory info about the currently mounted filesystem */
303 struct fsys_reiser_info
304 {
305 /* The last read item head */
306 struct item_head *current_ih;
307 /* The last read item */
308 char *current_item;
309 /* The information for the currently opened file */
310 struct fsys_reiser_fileinfo fileinfo;
311 /* The start of the journal */
312 __u32 journal_block;
313 /* The size of the journal */
314 __u32 journal_block_count;
315 /* The first valid descriptor block in journal
316 (relative to journal_block) */
317 __u32 journal_first_desc;
318
319 /* The ReiserFS version. */
320 __u16 version;
321 /* The current depth of the reiser tree. */
322 __u16 tree_depth;
323 /* SECTOR_SIZE << blocksize_shift == blocksize. */
324 __u8 blocksize_shift;
325 /* 1 << full_blocksize_shift == blocksize. */
326 __u8 fullblocksize_shift;
327 /* The reiserfs block size (must be a power of 2) */
328 __u16 blocksize;
329 /* The number of cached tree nodes */
330 __u16 cached_slots;
331 /* The number of valid transactions in journal */
332 __u16 journal_transactions;
333
334 unsigned int blocks[MAX_HEIGHT];
335 unsigned int next_key_nr[MAX_HEIGHT];
336 };
337
338 /* The cached s+tree blocks in FSYS_BUF, see below
339 * for a more detailed description.
340 */
341 #define ROOT ((char *) FSYS_BUF)
342 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
343 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL)
344
345 #define BLOCKHEAD(cache) ((struct block_head *) cache)
346 #define ITEMHEAD ((struct item_head *) ((char *) LEAF + BLKH_SIZE))
347 #define KEY(cache) ((struct key *) ((char *) cache + BLKH_SIZE))
348 #define DC(cache) ((struct disk_child *) \
349 ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
350 /* The fsys_reiser_info block.
351 */
352 #define INFO \
353 ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
354 /*
355 * The journal cache. For each transaction it contains the number of
356 * blocks followed by the real block numbers of this transaction.
357 *
358 * If the block numbers of some transaction won't fit in this space,
359 * this list is stopped with a 0xffffffff marker and the remaining
360 * uncommitted transactions aren't cached.
361 */
362 #define JOURNAL_START ((__u32 *) (INFO + 1))
363 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
364
365 #define log2 grub_log2
366
367 static __inline__ int
is_power_of_two(unsigned long word)368 is_power_of_two (unsigned long word)
369 {
370 return (word & -word) == word;
371 }
372
373 static int
journal_read(fsi_file_t * ffi,int block,int len,char * buffer)374 journal_read (fsi_file_t *ffi, int block, int len, char *buffer)
375 {
376 return devread (ffi, (INFO->journal_block + block) << INFO->blocksize_shift,
377 0, len, buffer);
378 }
379
380 /* Read a block from ReiserFS file system, taking the journal into
381 * account. If the block nr is in the journal, the block from the
382 * journal taken.
383 */
384 static int
block_read(fsi_file_t * ffi,int blockNr,int start,int len,char * buffer)385 block_read (fsi_file_t *ffi, int blockNr, int start, int len, char *buffer)
386 {
387 int transactions = INFO->journal_transactions;
388 int desc_block = INFO->journal_first_desc;
389 int journal_mask = INFO->journal_block_count - 1;
390 int translatedNr = blockNr;
391 __u32 *journal_table = JOURNAL_START;
392 while (transactions-- > 0)
393 {
394 int i = 0;
395 int j_len;
396 if (*journal_table != 0xffffffff)
397 {
398 /* Search for the blockNr in cached journal */
399 j_len = *journal_table++;
400 while (i++ < j_len)
401 {
402 if (*journal_table++ == blockNr)
403 {
404 journal_table += j_len - i;
405 goto found;
406 }
407 }
408 }
409 else
410 {
411 /* This is the end of cached journal marker. The remaining
412 * transactions are still on disk.
413 */
414 struct reiserfs_journal_desc desc;
415 struct reiserfs_journal_commit commit;
416
417 if (! journal_read (ffi, desc_block, sizeof (desc), (char *) &desc))
418 return 0;
419
420 j_len = desc.j_len;
421 while (i < j_len && i < JOURNAL_TRANS_HALF)
422 if (desc.j_realblock[i++] == blockNr)
423 goto found;
424
425 if (j_len >= JOURNAL_TRANS_HALF)
426 {
427 int commit_block = (desc_block + 1 + j_len) & journal_mask;
428 if (! journal_read (ffi, commit_block,
429 sizeof (commit), (char *) &commit))
430 return 0;
431 while (i < j_len)
432 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
433 goto found;
434 }
435 }
436 goto not_found;
437
438 found:
439 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
440 #ifdef REISERDEBUG
441 printf ("block_read: block %d is mapped to journal block %d.\n",
442 blockNr, translatedNr - INFO->journal_block);
443 #endif
444 /* We must continue the search, as this block may be overwritten
445 * in later transactions.
446 */
447 not_found:
448 desc_block = (desc_block + 2 + j_len) & journal_mask;
449 }
450 return devread (ffi, translatedNr << INFO->blocksize_shift, start, len, buffer);
451 }
452
453 /* Init the journal data structure. We try to cache as much as
454 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
455 * we can still read the rest from the disk on demand.
456 *
457 * The first number of valid transactions and the descriptor block of the
458 * first valid transaction are held in INFO. The transactions are all
459 * adjacent, but we must take care of the journal wrap around.
460 */
461 static int
journal_init(fsi_file_t * ffi)462 journal_init (fsi_file_t *ffi)
463 {
464 unsigned int block_count = INFO->journal_block_count;
465 unsigned int desc_block;
466 unsigned int commit_block;
467 unsigned int next_trans_id;
468 struct reiserfs_journal_header header;
469 struct reiserfs_journal_desc desc;
470 struct reiserfs_journal_commit commit;
471 __u32 *journal_table = JOURNAL_START;
472
473 journal_read (ffi, block_count, sizeof (header), (char *) &header);
474 desc_block = header.j_first_unflushed_offset;
475 if (desc_block >= block_count)
476 return 0;
477
478 INFO->journal_first_desc = desc_block;
479 next_trans_id = header.j_last_flush_trans_id + 1;
480
481 #ifdef REISERDEBUG
482 printf ("journal_init: last flushed %d\n",
483 header.j_last_flush_trans_id);
484 #endif
485
486 while (1)
487 {
488 journal_read (ffi, desc_block, sizeof (desc), (char *) &desc);
489 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic)
490 || desc.j_trans_id != next_trans_id
491 || desc.j_mount_id != header.j_mount_id)
492 /* no more valid transactions */
493 break;
494
495 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
496 journal_read (ffi, commit_block, sizeof (commit), (char *) &commit);
497 if (desc.j_trans_id != commit.j_trans_id
498 || desc.j_len != commit.j_len)
499 /* no more valid transactions */
500 break;
501
502 #ifdef REISERDEBUG
503 printf ("Found valid transaction %d/%d at %d.\n",
504 desc.j_trans_id, desc.j_mount_id, desc_block);
505 #endif
506
507 next_trans_id++;
508 if (journal_table < JOURNAL_END)
509 {
510 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
511 {
512 /* The table is almost full; mark the end of the cached
513 * journal.*/
514 *journal_table = 0xffffffff;
515 journal_table = JOURNAL_END;
516 }
517 else
518 {
519 int i;
520 /* Cache the length and the realblock numbers in the table.
521 * The block number of descriptor can easily be computed.
522 * and need not to be stored here.
523 */
524 *journal_table++ = desc.j_len;
525 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
526 {
527 *journal_table++ = desc.j_realblock[i];
528 #ifdef REISERDEBUG
529 printf ("block %d is in journal %d.\n",
530 desc.j_realblock[i], desc_block);
531 #endif
532 }
533 for ( ; i < desc.j_len; i++)
534 {
535 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
536 #ifdef REISERDEBUG
537 printf ("block %d is in journal %d.\n",
538 commit.j_realblock[i-JOURNAL_TRANS_HALF],
539 desc_block);
540 #endif
541 }
542 }
543 }
544 desc_block = (commit_block + 1) & (block_count - 1);
545 }
546 #ifdef REISERDEBUG
547 printf ("Transaction %d/%d at %d isn't valid.\n",
548 desc.j_trans_id, desc.j_mount_id, desc_block);
549 #endif
550
551 INFO->journal_transactions
552 = next_trans_id - header.j_last_flush_trans_id - 1;
553 return errnum == 0;
554 }
555
556 /* check filesystem types and read superblock into memory buffer */
557 static int
reiserfs_mount(fsi_file_t * ffi,const char * options)558 reiserfs_mount (fsi_file_t *ffi, const char *options)
559 {
560 struct reiserfs_super_block super;
561 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
562
563 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
564 || */ !devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
565 (char *) &super)
566 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
567 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
568 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
569 || (/* check that this is not a copy inside the journal log */
570 super.s_journal_block * super.s_blocksize
571 <= REISERFS_DISK_OFFSET_IN_BYTES))
572 {
573 /* Try old super block position */
574 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
575 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
576 || */ ! devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
577 (char *) &super))
578 return 0;
579
580 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
581 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
582 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
583 {
584 /* pre journaling super block ? */
585 if (substring (REISERFS_SUPER_MAGIC_STRING,
586 (char*) ((char *) &super + 20)) > 0)
587 return 0;
588
589 super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
590 super.s_journal_block = 0;
591 super.s_version = 0;
592 }
593 }
594
595 /* check the version number. */
596 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
597 return 0;
598
599 INFO->version = super.s_version;
600 INFO->blocksize = super.s_blocksize;
601 INFO->fullblocksize_shift = log2 (super.s_blocksize);
602 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
603 INFO->cached_slots =
604 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
605
606 #ifdef REISERDEBUG
607 printf ("reiserfs_mount: version=%d, blocksize=%d\n",
608 INFO->version, INFO->blocksize);
609 #endif /* REISERDEBUG */
610
611 /* Clear node cache. */
612 memset (INFO->blocks, 0, sizeof (INFO->blocks));
613
614 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
615 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
616 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
617 return 0;
618
619 /* Initialize journal code. If something fails we end with zero
620 * journal_transactions, so we don't access the journal at all.
621 */
622 INFO->journal_transactions = 0;
623 if (super.s_journal_block != 0 && super.s_journal_dev == 0)
624 {
625 INFO->journal_block = super.s_journal_block;
626 INFO->journal_block_count = super.s_journal_size;
627 if (is_power_of_two (INFO->journal_block_count))
628 journal_init (ffi);
629
630 /* Read in super block again, maybe it is in the journal */
631 block_read (ffi, superblock >> INFO->blocksize_shift,
632 0, sizeof (struct reiserfs_super_block), (char *) &super);
633 }
634
635 if (! block_read (ffi, super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
636 return 0;
637
638 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
639
640 #ifdef REISERDEBUG
641 printf ("root read_in: block=%d, depth=%d\n",
642 super.s_root_block, INFO->tree_depth);
643 #endif /* REISERDEBUG */
644
645 if (INFO->tree_depth >= MAX_HEIGHT)
646 return 0;
647 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
648 {
649 /* There is only one node in the whole filesystem,
650 * which is simultanously leaf and root */
651 memcpy (LEAF, ROOT, INFO->blocksize);
652 }
653 return 1;
654 }
655
656 /***************** TREE ACCESSING METHODS *****************************/
657
658 /* I assume you are familiar with the ReiserFS tree, if not go to
659 * http://www.namesys.com/content_table.html
660 *
661 * My tree node cache is organized as following
662 * 0 ROOT node
663 * 1 LEAF node (if the ROOT is also a LEAF it is copied here
664 * 2-n other nodes on current path from bottom to top.
665 * if there is not enough space in the cache, the top most are
666 * omitted.
667 *
668 * I have only two methods to find a key in the tree:
669 * search_stat(dir_id, objectid) searches for the stat entry (always
670 * the first entry) of an object.
671 * next_key() gets the next key in tree order.
672 *
673 * This means, that I can only sequential reads of files are
674 * efficient, but this really doesn't hurt for grub.
675 */
676
677 /* Read in the node at the current path and depth into the node cache.
678 * You must set INFO->blocks[depth] before.
679 */
680 static char *
read_tree_node(fsi_file_t * ffi,unsigned int blockNr,int depth)681 read_tree_node (fsi_file_t *ffi, unsigned int blockNr, int depth)
682 {
683 char* cache = CACHE(depth);
684 int num_cached = INFO->cached_slots;
685 if (depth < num_cached)
686 {
687 /* This is the cached part of the path. Check if same block is
688 * needed.
689 */
690 if (blockNr == INFO->blocks[depth])
691 return cache;
692 }
693 else
694 cache = CACHE(num_cached);
695
696 #ifdef REISERDEBUG
697 printf (" next read_in: block=%d (depth=%d)\n",
698 blockNr, depth);
699 #endif /* REISERDEBUG */
700 if (! block_read (ffi, blockNr, 0, INFO->blocksize, cache))
701 return 0;
702 /* Make sure it has the right node level */
703 if (BLOCKHEAD (cache)->blk_level != depth)
704 {
705 errnum = ERR_FSYS_CORRUPT;
706 return 0;
707 }
708
709 INFO->blocks[depth] = blockNr;
710 return cache;
711 }
712
713 /* Get the next key, i.e. the key following the last retrieved key in
714 * tree order. INFO->current_ih and
715 * INFO->current_info are adapted accordingly. */
716 static int
next_key(fsi_file_t * ffi)717 next_key (fsi_file_t *ffi)
718 {
719 int depth;
720 struct item_head *ih = INFO->current_ih + 1;
721 char *cache;
722
723 #ifdef REISERDEBUG
724 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
725 INFO->current_ih->ih_key.k_dir_id,
726 INFO->current_ih->ih_key.k_objectid,
727 INFO->current_ih->ih_key.u.v1.k_offset,
728 INFO->current_ih->ih_key.u.v1.k_uniqueness,
729 INFO->current_ih->ih_version);
730 #endif /* REISERDEBUG */
731
732 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
733 {
734 depth = DISK_LEAF_NODE_LEVEL;
735 /* The last item, was the last in the leaf node.
736 * Read in the next block
737 */
738 do
739 {
740 if (depth == INFO->tree_depth)
741 {
742 /* There are no more keys at all.
743 * Return a dummy item with MAX_KEY */
744 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
745 goto found;
746 }
747 depth++;
748 #ifdef REISERDEBUG
749 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
750 #endif /* REISERDEBUG */
751 }
752 while (INFO->next_key_nr[depth] == 0);
753
754 if (depth == INFO->tree_depth)
755 cache = ROOT;
756 else if (depth <= INFO->cached_slots)
757 cache = CACHE (depth);
758 else
759 {
760 cache = read_tree_node (ffi, INFO->blocks[depth], depth);
761 if (! cache)
762 return 0;
763 }
764
765 do
766 {
767 int nr_item = BLOCKHEAD (cache)->blk_nr_item;
768 int key_nr = INFO->next_key_nr[depth]++;
769 #ifdef REISERDEBUG
770 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
771 #endif /* REISERDEBUG */
772 if (key_nr == nr_item)
773 /* This is the last item in this block, set the next_key_nr to 0 */
774 INFO->next_key_nr[depth] = 0;
775
776 cache = read_tree_node (ffi, DC (cache)[key_nr].dc_block_number, --depth);
777 if (! cache)
778 return 0;
779 }
780 while (depth > DISK_LEAF_NODE_LEVEL);
781
782 ih = ITEMHEAD;
783 }
784 found:
785 INFO->current_ih = ih;
786 INFO->current_item = &LEAF[ih->ih_item_location];
787 #ifdef REISERDEBUG
788 printf (" new ih: key %d:%d:%d:%d version:%d\n",
789 INFO->current_ih->ih_key.k_dir_id,
790 INFO->current_ih->ih_key.k_objectid,
791 INFO->current_ih->ih_key.u.v1.k_offset,
792 INFO->current_ih->ih_key.u.v1.k_uniqueness,
793 INFO->current_ih->ih_version);
794 #endif /* REISERDEBUG */
795 return 1;
796 }
797
798 /* preconditions: reiserfs_mount already executed, therefore
799 * INFO block is valid
800 * returns: 0 if error (errnum is set),
801 * nonzero iff we were able to find the key successfully.
802 * postconditions: on a nonzero return, the current_ih and
803 * current_item fields describe the key that equals the
804 * searched key. INFO->next_key contains the next key after
805 * the searched key.
806 * side effects: messes around with the cache.
807 */
808 static int
search_stat(fsi_file_t * ffi,__u32 dir_id,__u32 objectid)809 search_stat (fsi_file_t *ffi, __u32 dir_id, __u32 objectid)
810 {
811 char *cache;
812 int depth;
813 int nr_item;
814 int i;
815 struct item_head *ih;
816 #ifdef REISERDEBUG
817 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid);
818 #endif /* REISERDEBUG */
819
820 depth = INFO->tree_depth;
821 cache = ROOT;
822
823 while (depth > DISK_LEAF_NODE_LEVEL)
824 {
825 struct key *key;
826 nr_item = BLOCKHEAD (cache)->blk_nr_item;
827
828 key = KEY (cache);
829
830 for (i = 0; i < nr_item; i++)
831 {
832 if (key->k_dir_id > dir_id
833 || (key->k_dir_id == dir_id
834 && (key->k_objectid > objectid
835 || (key->k_objectid == objectid
836 && (key->u.v1.k_offset
837 | key->u.v1.k_uniqueness) > 0))))
838 break;
839 key++;
840 }
841
842 #ifdef REISERDEBUG
843 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
844 #endif /* REISERDEBUG */
845 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
846 cache = read_tree_node (ffi, DC (cache)[i].dc_block_number, --depth);
847 if (! cache)
848 return 0;
849 }
850
851 /* cache == LEAF */
852 nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
853 ih = ITEMHEAD;
854 for (i = 0; i < nr_item; i++)
855 {
856 if (ih->ih_key.k_dir_id == dir_id
857 && ih->ih_key.k_objectid == objectid
858 && ih->ih_key.u.v1.k_offset == 0
859 && ih->ih_key.u.v1.k_uniqueness == 0)
860 {
861 #ifdef REISERDEBUG
862 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
863 #endif /* REISERDEBUG */
864 INFO->current_ih = ih;
865 INFO->current_item = &LEAF[ih->ih_item_location];
866 return 1;
867 }
868 ih++;
869 }
870 errnum = ERR_FSYS_CORRUPT;
871 return 0;
872 }
873
874 static int
reiserfs_read(fsi_file_t * ffi,char * buf,int len)875 reiserfs_read (fsi_file_t *ffi, char *buf, int len)
876 {
877 unsigned int blocksize;
878 unsigned int offset;
879 unsigned int to_read;
880 char *prev_buf = buf;
881
882 #ifdef REISERDEBUG
883 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
884 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
885 #endif /* REISERDEBUG */
886
887 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
888 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
889 {
890 search_stat (ffi, INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
891 goto get_next_key;
892 }
893
894 while (! errnum)
895 {
896 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
897 break;
898
899 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
900 blocksize = INFO->current_ih->ih_item_len;
901
902 #ifdef REISERDEBUG
903 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
904 filepos, len, offset, blocksize);
905 #endif /* REISERDEBUG */
906
907 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
908 && offset < blocksize)
909 {
910 #ifdef REISERDEBUG
911 printf ("direct_read: offset=%d, blocksize=%d\n",
912 offset, blocksize);
913 #endif /* REISERDEBUG */
914 to_read = blocksize - offset;
915 if (to_read > len)
916 to_read = len;
917
918 if (disk_read_hook != NULL)
919 {
920 disk_read_func = disk_read_hook;
921
922 block_read (ffi, INFO->blocks[DISK_LEAF_NODE_LEVEL],
923 (INFO->current_item - LEAF + offset), to_read, buf);
924
925 disk_read_func = NULL;
926 }
927 else
928 memcpy (buf, INFO->current_item + offset, to_read);
929 goto update_buf_len;
930 }
931 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
932 {
933 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
934 #ifdef REISERDEBUG
935 printf ("indirect_read: offset=%d, blocksize=%d\n",
936 offset, blocksize);
937 #endif /* REISERDEBUG */
938
939 while (offset < blocksize)
940 {
941 __u32 blocknr = ((__u32 *) INFO->current_item)
942 [offset >> INFO->fullblocksize_shift];
943 int blk_offset = offset & (INFO->blocksize-1);
944
945 to_read = INFO->blocksize - blk_offset;
946 if (to_read > len)
947 to_read = len;
948
949 disk_read_func = disk_read_hook;
950
951 /* Journal is only for meta data. Data blocks can be read
952 * directly without using block_read
953 */
954 devread (ffi, blocknr << INFO->blocksize_shift,
955 blk_offset, to_read, buf);
956
957 disk_read_func = NULL;
958 update_buf_len:
959 len -= to_read;
960 buf += to_read;
961 offset += to_read;
962 filepos += to_read;
963 if (len == 0)
964 goto done;
965 }
966 }
967 get_next_key:
968 next_key (ffi);
969 }
970 done:
971 return errnum ? 0 : buf - prev_buf;
972 }
973
974
975 /* preconditions: reiserfs_mount already executed, therefore
976 * INFO block is valid
977 * returns: 0 if error, nonzero iff we were able to find the file successfully
978 * postconditions: on a nonzero return, INFO->fileinfo contains the info
979 * of the file we were trying to look up, filepos is 0 and filemax is
980 * the size of the file.
981 */
982 static int
reiserfs_dir(fsi_file_t * ffi,char * dirname)983 reiserfs_dir (fsi_file_t *ffi, char *dirname)
984 {
985 struct reiserfs_de_head *de_head;
986 char *rest, ch;
987 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
988 #ifndef STAGE1_5
989 int do_possibilities = 0;
990 #endif /* ! STAGE1_5 */
991 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */
992 int link_count = 0;
993 int mode;
994
995 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
996 objectid = REISERFS_ROOT_OBJECTID;
997
998 while (1)
999 {
1000 #ifdef REISERDEBUG
1001 printf ("dirname=%s\n", dirname);
1002 #endif /* REISERDEBUG */
1003
1004 /* Search for the stat info first. */
1005 if (! search_stat (ffi, dir_id, objectid))
1006 return 0;
1007
1008 #ifdef REISERDEBUG
1009 printf ("sd_mode=%x sd_size=%d\n",
1010 ((struct stat_data *) INFO->current_item)->sd_mode,
1011 ((struct stat_data *) INFO->current_item)->sd_size);
1012 #endif /* REISERDEBUG */
1013
1014 mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1015
1016 /* If we've got a symbolic link, then chase it. */
1017 if (S_ISLNK (mode))
1018 {
1019 int len;
1020 if (++link_count > MAX_LINK_COUNT)
1021 {
1022 errnum = ERR_SYMLINK_LOOP;
1023 return 0;
1024 }
1025
1026 /* Get the symlink size. */
1027 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1028
1029 /* Find out how long our remaining name is. */
1030 len = 0;
1031 while (dirname[len] && !isspace ((uint8_t)dirname[len]))
1032 len++;
1033
1034 if (filemax + len > sizeof (linkbuf) - 1)
1035 {
1036 errnum = ERR_FILELENGTH;
1037 return 0;
1038 }
1039
1040 /* Copy the remaining name to the end of the symlink data.
1041 Note that DIRNAME and LINKBUF may overlap! */
1042 grub_memmove (linkbuf + filemax, dirname, len+1);
1043
1044 INFO->fileinfo.k_dir_id = dir_id;
1045 INFO->fileinfo.k_objectid = objectid;
1046 filepos = 0;
1047 if (! next_key (ffi)
1048 || reiserfs_read (ffi, linkbuf, filemax) != filemax)
1049 {
1050 if (! errnum)
1051 errnum = ERR_FSYS_CORRUPT;
1052 return 0;
1053 }
1054
1055 #ifdef REISERDEBUG
1056 printf ("symlink=%s\n", linkbuf);
1057 #endif /* REISERDEBUG */
1058
1059 dirname = linkbuf;
1060 if (*dirname == '/')
1061 {
1062 /* It's an absolute link, so look it up in root. */
1063 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1064 objectid = REISERFS_ROOT_OBJECTID;
1065 }
1066 else
1067 {
1068 /* Relative, so look it up in our parent directory. */
1069 dir_id = parent_dir_id;
1070 objectid = parent_objectid;
1071 }
1072
1073 /* Now lookup the new name. */
1074 continue;
1075 }
1076
1077 /* if we have a real file (and we're not just printing possibilities),
1078 then this is where we want to exit */
1079
1080 if (! *dirname || isspace ((uint8_t)*dirname))
1081 {
1082 if (! S_ISREG (mode))
1083 {
1084 errnum = ERR_BAD_FILETYPE;
1085 return 0;
1086 }
1087
1088 filepos = 0;
1089 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1090
1091 /* If this is a new stat data and size is > 4GB set filemax to
1092 * maximum
1093 */
1094 if (INFO->current_ih->ih_version == ITEM_VERSION_2
1095 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1096 filemax = 0xffffffff;
1097
1098 INFO->fileinfo.k_dir_id = dir_id;
1099 INFO->fileinfo.k_objectid = objectid;
1100 return next_key (ffi);
1101 }
1102
1103 /* continue with the file/directory name interpretation */
1104 while (*dirname == '/')
1105 dirname++;
1106 if (! S_ISDIR (mode))
1107 {
1108 errnum = ERR_BAD_FILETYPE;
1109 return 0;
1110 }
1111 for (rest = dirname; (ch = *rest) && ! isspace ((uint8_t)ch) && ch != '/'; rest++);
1112 *rest = 0;
1113
1114 # ifndef STAGE1_5
1115 if (print_possibilities && ch != '/')
1116 do_possibilities = 1;
1117 # endif /* ! STAGE1_5 */
1118
1119 while (1)
1120 {
1121 char *name_end;
1122 int num_entries;
1123
1124 if (! next_key (ffi))
1125 return 0;
1126 #ifdef REISERDEBUG
1127 printf ("ih: key %d:%d:%d:%d version:%d\n",
1128 INFO->current_ih->ih_key.k_dir_id,
1129 INFO->current_ih->ih_key.k_objectid,
1130 INFO->current_ih->ih_key.u.v1.k_offset,
1131 INFO->current_ih->ih_key.u.v1.k_uniqueness,
1132 INFO->current_ih->ih_version);
1133 #endif /* REISERDEBUG */
1134
1135 if (INFO->current_ih->ih_key.k_objectid != objectid)
1136 break;
1137
1138 name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1139 de_head = (struct reiserfs_de_head *) INFO->current_item;
1140 num_entries = INFO->current_ih->u.ih_entry_count;
1141 while (num_entries > 0)
1142 {
1143 char *filename = INFO->current_item + de_head->deh_location;
1144 char tmp = *name_end;
1145 if ((de_head->deh_state & DEH_Visible))
1146 {
1147 int cmp;
1148 /* Directory names in ReiserFS are not null
1149 * terminated. We write a temporary 0 behind it.
1150 * NOTE: that this may overwrite the first block in
1151 * the tree cache. That doesn't hurt as long as we
1152 * don't call next_key () in between.
1153 */
1154 *name_end = 0;
1155 cmp = substring (dirname, filename);
1156 *name_end = tmp;
1157 # ifndef STAGE1_5
1158 if (do_possibilities)
1159 {
1160 if (cmp <= 0)
1161 {
1162 if (print_possibilities > 0)
1163 print_possibilities = -print_possibilities;
1164 *name_end = 0;
1165 print_a_completion (filename);
1166 *name_end = tmp;
1167 }
1168 }
1169 else
1170 # endif /* ! STAGE1_5 */
1171 if (cmp == 0)
1172 goto found;
1173 }
1174 /* The beginning of this name marks the end of the next name.
1175 */
1176 name_end = filename;
1177 de_head++;
1178 num_entries--;
1179 }
1180 }
1181
1182 # ifndef STAGE1_5
1183 if (print_possibilities < 0)
1184 return 1;
1185 # endif /* ! STAGE1_5 */
1186
1187 errnum = ERR_FILE_NOT_FOUND;
1188 *rest = ch;
1189 return 0;
1190
1191 found:
1192
1193 *rest = ch;
1194 dirname = rest;
1195
1196 parent_dir_id = dir_id;
1197 parent_objectid = objectid;
1198 dir_id = de_head->deh_dir_id;
1199 objectid = de_head->deh_objectid;
1200 }
1201 }
1202
1203 int
reiserfs_embed(fsi_file_t * ffi,int * start_sector,int needed_sectors)1204 reiserfs_embed (fsi_file_t *ffi, int *start_sector, int needed_sectors)
1205 {
1206 struct reiserfs_super_block super;
1207 int num_sectors;
1208
1209 if (! devread (ffi, REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1210 sizeof (struct reiserfs_super_block), (char *) &super))
1211 return 0;
1212
1213 *start_sector = 1; /* reserve first sector for stage1 */
1214 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1215 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1216 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1217 && (/* check that this is not a super block copy inside
1218 * the journal log */
1219 super.s_journal_block * super.s_blocksize
1220 > REISERFS_DISK_OFFSET_IN_BYTES))
1221 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1222 else
1223 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1224
1225 return (needed_sectors <= num_sectors);
1226 }
1227
1228 fsi_plugin_ops_t *
fsi_init_plugin(int version,fsi_plugin_t * fp,const char ** name)1229 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1230 {
1231 static fsig_plugin_ops_t ops = {
1232 FSIMAGE_PLUGIN_VERSION,
1233 .fpo_mount = reiserfs_mount,
1234 .fpo_dir = reiserfs_dir,
1235 .fpo_read = reiserfs_read
1236 };
1237
1238 *name = "reiserfs";
1239 return (fsig_init(fp, &ops));
1240 }
1241