1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3 * GRUB -- GRand Unified Bootloader
4 * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <xenfsimage_grub.h>
21
22 #undef REISERDEBUG
23
24 /* Some parts of this code (mainly the structures and defines) are
25 * from the original reiser fs code, as found in the linux kernel.
26 */
27
28 /* include/asm-i386/types.h */
29 typedef __signed__ char __s8;
30 typedef unsigned char __u8;
31 typedef __signed__ short __s16;
32 typedef unsigned short __u16;
33 typedef __signed__ int __s32;
34 typedef unsigned int __u32;
35 typedef unsigned long long __u64;
36
37 /* linux/posix_type.h */
38 typedef long linux_off_t;
39
40 /* linux/little_endian.h */
41 #define __cpu_to_le64(x) ((__u64) (x))
42 #define __le64_to_cpu(x) ((__u64) (x))
43 #define __cpu_to_le32(x) ((__u32) (x))
44 #define __le32_to_cpu(x) ((__u32) (x))
45 #define __cpu_to_le16(x) ((__u16) (x))
46 #define __le16_to_cpu(x) ((__u16) (x))
47
48 /* include/linux/reiser_fs.h */
49 /* This is the new super block of a journaling reiserfs system */
50 struct reiserfs_super_block
51 {
52 __u32 s_block_count; /* blocks count */
53 __u32 s_free_blocks; /* free blocks count */
54 __u32 s_root_block; /* root block number */
55 __u32 s_journal_block; /* journal block number */
56 __u32 s_journal_dev; /* journal device number */
57 __u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
58 __u32 s_journal_trans_max; /* max number of blocks in a transaction. */
59 __u32 s_journal_magic; /* random value made on fs creation */
60 __u32 s_journal_max_batch; /* max number of blocks to batch into a trans */
61 __u32 s_journal_max_commit_age; /* in seconds, how old can an async commit be */
62 __u32 s_journal_max_trans_age; /* in seconds, how old can a transaction be */
63 __u16 s_blocksize; /* block size */
64 __u16 s_oid_maxsize; /* max size of object id array */
65 __u16 s_oid_cursize; /* current size of object id array */
66 __u16 s_state; /* valid or error */
67 char s_magic[16]; /* reiserfs magic string indicates that file system is reiserfs */
68 __u16 s_tree_height; /* height of disk tree */
69 __u16 s_bmap_nr; /* amount of bitmap blocks needed to address each block of file system */
70 __u16 s_version;
71 char s_unused[128]; /* zero filled by mkreiserfs */
72 };
73
74 #define REISERFS_MAX_SUPPORTED_VERSION 2
75 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
76 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
77 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
78
79 #define MAX_HEIGHT 7
80
81 /* must be correct to keep the desc and commit structs at 4k */
82 #define JOURNAL_TRANS_HALF 1018
83
84 /* first block written in a commit. */
85 struct reiserfs_journal_desc {
86 __u32 j_trans_id; /* id of commit */
87 __u32 j_len; /* length of commit. len +1 is the commit block */
88 __u32 j_mount_id; /* mount id of this trans*/
89 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
90 char j_magic[12];
91 };
92
93 /* last block written in a commit */
94 struct reiserfs_journal_commit {
95 __u32 j_trans_id; /* must match j_trans_id from the desc block */
96 __u32 j_len; /* ditto */
97 __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
98 char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
99 };
100
101 /* this header block gets written whenever a transaction is considered
102 fully flushed, and is more recent than the last fully flushed
103 transaction.
104 fully flushed means all the log blocks and all the real blocks are
105 on disk, and this transaction does not need to be replayed.
106 */
107 struct reiserfs_journal_header {
108 /* id of last fully flushed transaction */
109 __u32 j_last_flush_trans_id;
110 /* offset in the log of where to start replay after a crash */
111 __u32 j_first_unflushed_offset;
112 /* mount id to detect very old transactions */
113 __u32 j_mount_id;
114 };
115
116 /* magic string to find desc blocks in the journal */
117 #define JOURNAL_DESC_MAGIC "ReIsErLB"
118
119
120 /*
121 * directories use this key as well as old files
122 */
123 struct offset_v1
124 {
125 /*
126 * for regular files this is the offset to the first byte of the
127 * body, contained in the object-item, as measured from the start of
128 * the entire body of the object.
129 *
130 * for directory entries, k_offset consists of hash derived from
131 * hashing the name and using few bits (23 or more) of the resulting
132 * hash, and generation number that allows distinguishing names with
133 * hash collisions. If number of collisions overflows generation
134 * number, we return EEXIST. High order bit is 0 always
135 */
136 __u32 k_offset;
137 __u32 k_uniqueness;
138 };
139
140 struct offset_v2
141 {
142 /*
143 * for regular files this is the offset to the first byte of the
144 * body, contained in the object-item, as measured from the start of
145 * the entire body of the object.
146 *
147 * for directory entries, k_offset consists of hash derived from
148 * hashing the name and using few bits (23 or more) of the resulting
149 * hash, and generation number that allows distinguishing names with
150 * hash collisions. If number of collisions overflows generation
151 * number, we return EEXIST. High order bit is 0 always
152 */
153 __u64 k_offset:60;
154 __u64 k_type: 4;
155 };
156
157
158 struct key
159 {
160 /* packing locality: by default parent directory object id */
161 __u32 k_dir_id;
162 /* object identifier */
163 __u32 k_objectid;
164 /* the offset and node type (old and new form) */
165 union
166 {
167 struct offset_v1 v1;
168 struct offset_v2 v2;
169 }
170 u;
171 };
172
173 #define KEY_SIZE (sizeof (struct key))
174
175 /* Header of a disk block. More precisely, header of a formatted leaf
176 or internal node, and not the header of an unformatted node. */
177 struct block_head
178 {
179 __u16 blk_level; /* Level of a block in the tree. */
180 __u16 blk_nr_item; /* Number of keys/items in a block. */
181 __u16 blk_free_space; /* Block free space in bytes. */
182 struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
183 only) */
184 };
185 #define BLKH_SIZE (sizeof (struct block_head))
186 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
187
188 struct item_head
189 {
190 struct key ih_key; /* Everything in the tree is found by searching for it based on its key.*/
191
192 union
193 {
194 __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
195 is an indirect item. This equals 0xFFFF iff this is a direct item or
196 stat data item. Note that the key, not this field, is used to determine
197 the item type, and thus which field this union contains. */
198 __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
199 entries in the directory item. */
200 }
201 u;
202 __u16 ih_item_len; /* total size of the item body */
203 __u16 ih_item_location; /* an offset to the item body within the block */
204 __u16 ih_version; /* ITEM_VERSION_1 for all old items,
205 ITEM_VERSION_2 for new ones.
206 Highest bit is set by fsck
207 temporary, cleaned after all done */
208 };
209 /* size of item header */
210 #define IH_SIZE (sizeof (struct item_head))
211
212 #define ITEM_VERSION_1 0
213 #define ITEM_VERSION_2 1
214 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
215 ? (ih)->ih_key.u.v1.k_offset \
216 : (ih)->ih_key.u.v2.k_offset)
217
218 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
219 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
220 : (ih)->ih_key.u.v2.k_type == V2_##type)
221
222 struct disk_child
223 {
224 __u32 dc_block_number; /* Disk child's block number. */
225 __u16 dc_size; /* Disk child's used space. */
226 };
227
228 #define DC_SIZE (sizeof (struct disk_child))
229
230 /* Stat Data on disk.
231 *
232 * Note that reiserfs has two different forms of stat data. Luckily
233 * the fields needed by grub are at the same position.
234 */
235 struct stat_data
236 {
237 __u16 sd_mode; /* file type, permissions */
238 __u16 sd_notused1[3]; /* fields not needed by reiserfs */
239 __u32 sd_size; /* file size */
240 __u32 sd_size_hi; /* file size high 32 bits (since version 2) */
241 };
242
243 struct reiserfs_de_head
244 {
245 __u32 deh_offset; /* third component of the directory entry key */
246 __u32 deh_dir_id; /* objectid of the parent directory of the
247 object, that is referenced by directory entry */
248 __u32 deh_objectid;/* objectid of the object, that is referenced by
249 directory entry */
250 __u16 deh_location;/* offset of name in the whole item */
251 __u16 deh_state; /* whether 1) entry contains stat data (for
252 future), and 2) whether entry is hidden
253 (unlinked) */
254 };
255
256 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
257
258 #define DEH_Statdata (1 << 0) /* not used now */
259 #define DEH_Visible (1 << 2)
260
261 #define SD_OFFSET 0
262 #define SD_UNIQUENESS 0
263 #define DOT_OFFSET 1
264 #define DOT_DOT_OFFSET 2
265 #define DIRENTRY_UNIQUENESS 500
266
267 #define V1_TYPE_STAT_DATA 0x0
268 #define V1_TYPE_DIRECT 0xffffffff
269 #define V1_TYPE_INDIRECT 0xfffffffe
270 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
271 #define V2_TYPE_STAT_DATA 0
272 #define V2_TYPE_INDIRECT 1
273 #define V2_TYPE_DIRECT 2
274 #define V2_TYPE_DIRENTRY 3
275
276 #define REISERFS_ROOT_OBJECTID 2
277 #define REISERFS_ROOT_PARENT_OBJECTID 1
278 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
279 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
280 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
281 #define REISERFS_OLD_BLOCKSIZE 4096
282
283 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
284 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
285 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
286
287 #ifndef PATH_MAX
288 #define PATH_MAX 1024 /* include/linux/limits.h */
289 #endif
290 #define MAX_LINK_COUNT 5 /* number of symbolic links to follow */
291
292 /* The size of the node cache */
293 #define FSYSREISER_CACHE_SIZE 24*1024
294 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
295 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
296
297 /* Info about currently opened file */
298 struct fsys_reiser_fileinfo
299 {
300 __u32 k_dir_id;
301 __u32 k_objectid;
302 };
303
304 /* In memory info about the currently mounted filesystem */
305 struct fsys_reiser_info
306 {
307 /* The last read item head */
308 struct item_head *current_ih;
309 /* The last read item */
310 char *current_item;
311 /* The information for the currently opened file */
312 struct fsys_reiser_fileinfo fileinfo;
313 /* The start of the journal */
314 __u32 journal_block;
315 /* The size of the journal */
316 __u32 journal_block_count;
317 /* The first valid descriptor block in journal
318 (relative to journal_block) */
319 __u32 journal_first_desc;
320
321 /* The ReiserFS version. */
322 __u16 version;
323 /* The current depth of the reiser tree. */
324 __u16 tree_depth;
325 /* SECTOR_SIZE << blocksize_shift == blocksize. */
326 __u8 blocksize_shift;
327 /* 1 << full_blocksize_shift == blocksize. */
328 __u8 fullblocksize_shift;
329 /* The reiserfs block size (must be a power of 2) */
330 __u16 blocksize;
331 /* The number of cached tree nodes */
332 __u16 cached_slots;
333 /* The number of valid transactions in journal */
334 __u16 journal_transactions;
335
336 unsigned int blocks[MAX_HEIGHT];
337 unsigned int next_key_nr[MAX_HEIGHT];
338 };
339
340 /* The cached s+tree blocks in FSYS_BUF, see below
341 * for a more detailed description.
342 */
343 #define ROOT ((char *) FSYS_BUF)
344 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
345 #define LEAF CACHE (DISK_LEAF_NODE_LEVEL)
346
347 #define BLOCKHEAD(cache) ((struct block_head *) cache)
348 #define ITEMHEAD ((struct item_head *) ((char *) LEAF + BLKH_SIZE))
349 #define KEY(cache) ((struct key *) ((char *) cache + BLKH_SIZE))
350 #define DC(cache) ((struct disk_child *) \
351 ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
352 /* The fsys_reiser_info block.
353 */
354 #define INFO \
355 ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
356 /*
357 * The journal cache. For each transaction it contains the number of
358 * blocks followed by the real block numbers of this transaction.
359 *
360 * If the block numbers of some transaction won't fit in this space,
361 * this list is stopped with a 0xffffffff marker and the remaining
362 * uncommitted transactions aren't cached.
363 */
364 #define JOURNAL_START ((__u32 *) (INFO + 1))
365 #define JOURNAL_END ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
366
367 #define log2 grub_log2
368
369 static __inline__ int
is_power_of_two(unsigned long word)370 is_power_of_two (unsigned long word)
371 {
372 return (word & -word) == word;
373 }
374
375 static int
journal_read(fsi_file_t * ffi,int block,int len,char * buffer)376 journal_read (fsi_file_t *ffi, int block, int len, char *buffer)
377 {
378 return devread (ffi, (INFO->journal_block + block) << INFO->blocksize_shift,
379 0, len, buffer);
380 }
381
382 /* Read a block from ReiserFS file system, taking the journal into
383 * account. If the block nr is in the journal, the block from the
384 * journal taken.
385 */
386 static int
block_read(fsi_file_t * ffi,int blockNr,int start,int len,char * buffer)387 block_read (fsi_file_t *ffi, int blockNr, int start, int len, char *buffer)
388 {
389 int transactions = INFO->journal_transactions;
390 int desc_block = INFO->journal_first_desc;
391 int journal_mask = INFO->journal_block_count - 1;
392 int translatedNr = blockNr;
393 __u32 *journal_table = JOURNAL_START;
394 while (transactions-- > 0)
395 {
396 int i = 0;
397 int j_len;
398 if (*journal_table != 0xffffffff)
399 {
400 /* Search for the blockNr in cached journal */
401 j_len = *journal_table++;
402 while (i++ < j_len)
403 {
404 if (*journal_table++ == blockNr)
405 {
406 journal_table += j_len - i;
407 goto found;
408 }
409 }
410 }
411 else
412 {
413 /* This is the end of cached journal marker. The remaining
414 * transactions are still on disk.
415 */
416 struct reiserfs_journal_desc desc;
417 struct reiserfs_journal_commit commit;
418
419 if (! journal_read (ffi, desc_block, sizeof (desc), (char *) &desc))
420 return 0;
421
422 j_len = desc.j_len;
423 while (i < j_len && i < JOURNAL_TRANS_HALF)
424 if (desc.j_realblock[i++] == blockNr)
425 goto found;
426
427 if (j_len >= JOURNAL_TRANS_HALF)
428 {
429 int commit_block = (desc_block + 1 + j_len) & journal_mask;
430 if (! journal_read (ffi, commit_block,
431 sizeof (commit), (char *) &commit))
432 return 0;
433 while (i < j_len)
434 if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
435 goto found;
436 }
437 }
438 goto not_found;
439
440 found:
441 translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
442 #ifdef REISERDEBUG
443 printf ("block_read: block %d is mapped to journal block %d.\n",
444 blockNr, translatedNr - INFO->journal_block);
445 #endif
446 /* We must continue the search, as this block may be overwritten
447 * in later transactions.
448 */
449 not_found:
450 desc_block = (desc_block + 2 + j_len) & journal_mask;
451 }
452 return devread (ffi, translatedNr << INFO->blocksize_shift, start, len, buffer);
453 }
454
455 /* Init the journal data structure. We try to cache as much as
456 * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
457 * we can still read the rest from the disk on demand.
458 *
459 * The first number of valid transactions and the descriptor block of the
460 * first valid transaction are held in INFO. The transactions are all
461 * adjacent, but we must take care of the journal wrap around.
462 */
463 static int
journal_init(fsi_file_t * ffi)464 journal_init (fsi_file_t *ffi)
465 {
466 unsigned int block_count = INFO->journal_block_count;
467 unsigned int desc_block;
468 unsigned int commit_block;
469 unsigned int next_trans_id;
470 struct reiserfs_journal_header header;
471 struct reiserfs_journal_desc desc;
472 struct reiserfs_journal_commit commit;
473 __u32 *journal_table = JOURNAL_START;
474
475 journal_read (ffi, block_count, sizeof (header), (char *) &header);
476 desc_block = header.j_first_unflushed_offset;
477 if (desc_block >= block_count)
478 return 0;
479
480 INFO->journal_first_desc = desc_block;
481 next_trans_id = header.j_last_flush_trans_id + 1;
482
483 #ifdef REISERDEBUG
484 printf ("journal_init: last flushed %d\n",
485 header.j_last_flush_trans_id);
486 #endif
487
488 while (1)
489 {
490 journal_read (ffi, desc_block, sizeof (desc), (char *) &desc);
491 if (substring (JOURNAL_DESC_MAGIC, desc.j_magic)
492 || desc.j_trans_id != next_trans_id
493 || desc.j_mount_id != header.j_mount_id)
494 /* no more valid transactions */
495 break;
496
497 commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
498 journal_read (ffi, commit_block, sizeof (commit), (char *) &commit);
499 if (desc.j_trans_id != commit.j_trans_id
500 || desc.j_len != commit.j_len)
501 /* no more valid transactions */
502 break;
503
504 #ifdef REISERDEBUG
505 printf ("Found valid transaction %d/%d at %d.\n",
506 desc.j_trans_id, desc.j_mount_id, desc_block);
507 #endif
508
509 next_trans_id++;
510 if (journal_table < JOURNAL_END)
511 {
512 if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
513 {
514 /* The table is almost full; mark the end of the cached
515 * journal.*/
516 *journal_table = 0xffffffff;
517 journal_table = JOURNAL_END;
518 }
519 else
520 {
521 int i;
522 /* Cache the length and the realblock numbers in the table.
523 * The block number of descriptor can easily be computed.
524 * and need not to be stored here.
525 */
526 *journal_table++ = desc.j_len;
527 for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
528 {
529 *journal_table++ = desc.j_realblock[i];
530 #ifdef REISERDEBUG
531 printf ("block %d is in journal %d.\n",
532 desc.j_realblock[i], desc_block);
533 #endif
534 }
535 for ( ; i < desc.j_len; i++)
536 {
537 *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
538 #ifdef REISERDEBUG
539 printf ("block %d is in journal %d.\n",
540 commit.j_realblock[i-JOURNAL_TRANS_HALF],
541 desc_block);
542 #endif
543 }
544 }
545 }
546 desc_block = (commit_block + 1) & (block_count - 1);
547 }
548 #ifdef REISERDEBUG
549 printf ("Transaction %d/%d at %d isn't valid.\n",
550 desc.j_trans_id, desc.j_mount_id, desc_block);
551 #endif
552
553 INFO->journal_transactions
554 = next_trans_id - header.j_last_flush_trans_id - 1;
555 return errnum == 0;
556 }
557
558 /* check filesystem types and read superblock into memory buffer */
559 static int
reiserfs_mount(fsi_file_t * ffi,const char * options)560 reiserfs_mount (fsi_file_t *ffi, const char *options)
561 {
562 struct reiserfs_super_block super;
563 int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
564
565 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
566 || */ !devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
567 (char *) &super)
568 || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
569 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
570 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
571 || (/* check that this is not a copy inside the journal log */
572 super.s_journal_block * super.s_blocksize
573 <= REISERFS_DISK_OFFSET_IN_BYTES))
574 {
575 /* Try old super block position */
576 superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
577 if (/*part_length < superblock + (sizeof (super) >> SECTOR_BITS)
578 || */ ! devread (ffi, superblock, 0, sizeof (struct reiserfs_super_block),
579 (char *) &super))
580 return 0;
581
582 if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
583 && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
584 && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
585 {
586 /* pre journaling super block ? */
587 if (substring (REISERFS_SUPER_MAGIC_STRING,
588 (char*) ((char *) &super + 20)) > 0)
589 return 0;
590
591 super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
592 super.s_journal_block = 0;
593 super.s_version = 0;
594 }
595 }
596
597 /* check the version number. */
598 if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
599 return 0;
600
601 INFO->version = super.s_version;
602 INFO->blocksize = super.s_blocksize;
603 INFO->fullblocksize_shift = log2 (super.s_blocksize);
604 INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
605 INFO->cached_slots =
606 (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
607
608 #ifdef REISERDEBUG
609 printf ("reiserfs_mount: version=%d, blocksize=%d\n",
610 INFO->version, INFO->blocksize);
611 #endif /* REISERDEBUG */
612
613 /* Clear node cache. */
614 memset (INFO->blocks, 0, sizeof (INFO->blocks));
615
616 if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
617 || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
618 || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
619 return 0;
620
621 /* Initialize journal code. If something fails we end with zero
622 * journal_transactions, so we don't access the journal at all.
623 */
624 INFO->journal_transactions = 0;
625 if (super.s_journal_block != 0 && super.s_journal_dev == 0)
626 {
627 INFO->journal_block = super.s_journal_block;
628 INFO->journal_block_count = super.s_journal_size;
629 if (is_power_of_two (INFO->journal_block_count))
630 journal_init (ffi);
631
632 /* Read in super block again, maybe it is in the journal */
633 block_read (ffi, superblock >> INFO->blocksize_shift,
634 0, sizeof (struct reiserfs_super_block), (char *) &super);
635 }
636
637 if (! block_read (ffi, super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
638 return 0;
639
640 INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
641
642 #ifdef REISERDEBUG
643 printf ("root read_in: block=%d, depth=%d\n",
644 super.s_root_block, INFO->tree_depth);
645 #endif /* REISERDEBUG */
646
647 if (INFO->tree_depth >= MAX_HEIGHT)
648 return 0;
649 if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
650 {
651 /* There is only one node in the whole filesystem,
652 * which is simultanously leaf and root */
653 memcpy (LEAF, ROOT, INFO->blocksize);
654 }
655 return 1;
656 }
657
658 /***************** TREE ACCESSING METHODS *****************************/
659
660 /* I assume you are familiar with the ReiserFS tree, if not go to
661 * http://www.namesys.com/content_table.html
662 *
663 * My tree node cache is organized as following
664 * 0 ROOT node
665 * 1 LEAF node (if the ROOT is also a LEAF it is copied here
666 * 2-n other nodes on current path from bottom to top.
667 * if there is not enough space in the cache, the top most are
668 * omitted.
669 *
670 * I have only two methods to find a key in the tree:
671 * search_stat(dir_id, objectid) searches for the stat entry (always
672 * the first entry) of an object.
673 * next_key() gets the next key in tree order.
674 *
675 * This means, that I can only sequential reads of files are
676 * efficient, but this really doesn't hurt for grub.
677 */
678
679 /* Read in the node at the current path and depth into the node cache.
680 * You must set INFO->blocks[depth] before.
681 */
682 static char *
read_tree_node(fsi_file_t * ffi,unsigned int blockNr,int depth)683 read_tree_node (fsi_file_t *ffi, unsigned int blockNr, int depth)
684 {
685 char* cache = CACHE(depth);
686 int num_cached = INFO->cached_slots;
687 if (depth < num_cached)
688 {
689 /* This is the cached part of the path. Check if same block is
690 * needed.
691 */
692 if (blockNr == INFO->blocks[depth])
693 return cache;
694 }
695 else
696 cache = CACHE(num_cached);
697
698 #ifdef REISERDEBUG
699 printf (" next read_in: block=%d (depth=%d)\n",
700 blockNr, depth);
701 #endif /* REISERDEBUG */
702 if (! block_read (ffi, blockNr, 0, INFO->blocksize, cache))
703 return 0;
704 /* Make sure it has the right node level */
705 if (BLOCKHEAD (cache)->blk_level != depth)
706 {
707 errnum = ERR_FSYS_CORRUPT;
708 return 0;
709 }
710
711 INFO->blocks[depth] = blockNr;
712 return cache;
713 }
714
715 /* Get the next key, i.e. the key following the last retrieved key in
716 * tree order. INFO->current_ih and
717 * INFO->current_info are adapted accordingly. */
718 static int
next_key(fsi_file_t * ffi)719 next_key (fsi_file_t *ffi)
720 {
721 int depth;
722 struct item_head *ih = INFO->current_ih + 1;
723 char *cache;
724
725 #ifdef REISERDEBUG
726 printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
727 INFO->current_ih->ih_key.k_dir_id,
728 INFO->current_ih->ih_key.k_objectid,
729 INFO->current_ih->ih_key.u.v1.k_offset,
730 INFO->current_ih->ih_key.u.v1.k_uniqueness,
731 INFO->current_ih->ih_version);
732 #endif /* REISERDEBUG */
733
734 if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
735 {
736 depth = DISK_LEAF_NODE_LEVEL;
737 /* The last item, was the last in the leaf node.
738 * Read in the next block
739 */
740 do
741 {
742 if (depth == INFO->tree_depth)
743 {
744 /* There are no more keys at all.
745 * Return a dummy item with MAX_KEY */
746 ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
747 goto found;
748 }
749 depth++;
750 #ifdef REISERDEBUG
751 printf (" depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
752 #endif /* REISERDEBUG */
753 }
754 while (INFO->next_key_nr[depth] == 0);
755
756 if (depth == INFO->tree_depth)
757 cache = ROOT;
758 else if (depth <= INFO->cached_slots)
759 cache = CACHE (depth);
760 else
761 {
762 cache = read_tree_node (ffi, INFO->blocks[depth], depth);
763 if (! cache)
764 return 0;
765 }
766
767 do
768 {
769 int nr_item = BLOCKHEAD (cache)->blk_nr_item;
770 int key_nr = INFO->next_key_nr[depth]++;
771 #ifdef REISERDEBUG
772 printf (" depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
773 #endif /* REISERDEBUG */
774 if (key_nr == nr_item)
775 /* This is the last item in this block, set the next_key_nr to 0 */
776 INFO->next_key_nr[depth] = 0;
777
778 cache = read_tree_node (ffi, DC (cache)[key_nr].dc_block_number, --depth);
779 if (! cache)
780 return 0;
781 }
782 while (depth > DISK_LEAF_NODE_LEVEL);
783
784 ih = ITEMHEAD;
785 }
786 found:
787 INFO->current_ih = ih;
788 INFO->current_item = &LEAF[ih->ih_item_location];
789 #ifdef REISERDEBUG
790 printf (" new ih: key %d:%d:%d:%d version:%d\n",
791 INFO->current_ih->ih_key.k_dir_id,
792 INFO->current_ih->ih_key.k_objectid,
793 INFO->current_ih->ih_key.u.v1.k_offset,
794 INFO->current_ih->ih_key.u.v1.k_uniqueness,
795 INFO->current_ih->ih_version);
796 #endif /* REISERDEBUG */
797 return 1;
798 }
799
800 /* preconditions: reiserfs_mount already executed, therefore
801 * INFO block is valid
802 * returns: 0 if error (errnum is set),
803 * nonzero iff we were able to find the key successfully.
804 * postconditions: on a nonzero return, the current_ih and
805 * current_item fields describe the key that equals the
806 * searched key. INFO->next_key contains the next key after
807 * the searched key.
808 * side effects: messes around with the cache.
809 */
810 static int
search_stat(fsi_file_t * ffi,__u32 dir_id,__u32 objectid)811 search_stat (fsi_file_t *ffi, __u32 dir_id, __u32 objectid)
812 {
813 char *cache;
814 int depth;
815 int nr_item;
816 int i;
817 struct item_head *ih;
818 #ifdef REISERDEBUG
819 printf ("search_stat:\n key %d:%d:0:0\n", dir_id, objectid);
820 #endif /* REISERDEBUG */
821
822 depth = INFO->tree_depth;
823 cache = ROOT;
824
825 while (depth > DISK_LEAF_NODE_LEVEL)
826 {
827 struct key *key;
828 nr_item = BLOCKHEAD (cache)->blk_nr_item;
829
830 key = KEY (cache);
831
832 for (i = 0; i < nr_item; i++)
833 {
834 if (key->k_dir_id > dir_id
835 || (key->k_dir_id == dir_id
836 && (key->k_objectid > objectid
837 || (key->k_objectid == objectid
838 && (key->u.v1.k_offset
839 | key->u.v1.k_uniqueness) > 0))))
840 break;
841 key++;
842 }
843
844 #ifdef REISERDEBUG
845 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
846 #endif /* REISERDEBUG */
847 INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
848 cache = read_tree_node (ffi, DC (cache)[i].dc_block_number, --depth);
849 if (! cache)
850 return 0;
851 }
852
853 /* cache == LEAF */
854 nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
855 ih = ITEMHEAD;
856 for (i = 0; i < nr_item; i++)
857 {
858 if (ih->ih_key.k_dir_id == dir_id
859 && ih->ih_key.k_objectid == objectid
860 && ih->ih_key.u.v1.k_offset == 0
861 && ih->ih_key.u.v1.k_uniqueness == 0)
862 {
863 #ifdef REISERDEBUG
864 printf (" depth=%d, i=%d/%d\n", depth, i, nr_item);
865 #endif /* REISERDEBUG */
866 INFO->current_ih = ih;
867 INFO->current_item = &LEAF[ih->ih_item_location];
868 return 1;
869 }
870 ih++;
871 }
872 errnum = ERR_FSYS_CORRUPT;
873 return 0;
874 }
875
876 static int
reiserfs_read(fsi_file_t * ffi,char * buf,int len)877 reiserfs_read (fsi_file_t *ffi, char *buf, int len)
878 {
879 unsigned int blocksize;
880 unsigned int offset;
881 unsigned int to_read;
882 char *prev_buf = buf;
883
884 #ifdef REISERDEBUG
885 printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
886 filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
887 #endif /* REISERDEBUG */
888
889 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
890 || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
891 {
892 search_stat (ffi, INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
893 goto get_next_key;
894 }
895
896 while (! errnum)
897 {
898 if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
899 break;
900
901 offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
902 blocksize = INFO->current_ih->ih_item_len;
903
904 #ifdef REISERDEBUG
905 printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
906 filepos, len, offset, blocksize);
907 #endif /* REISERDEBUG */
908
909 if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
910 && offset < blocksize)
911 {
912 #ifdef REISERDEBUG
913 printf ("direct_read: offset=%d, blocksize=%d\n",
914 offset, blocksize);
915 #endif /* REISERDEBUG */
916 to_read = blocksize - offset;
917 if (to_read > len)
918 to_read = len;
919
920 if (disk_read_hook != NULL)
921 {
922 disk_read_func = disk_read_hook;
923
924 block_read (ffi, INFO->blocks[DISK_LEAF_NODE_LEVEL],
925 (INFO->current_item - LEAF + offset), to_read, buf);
926
927 disk_read_func = NULL;
928 }
929 else
930 memcpy (buf, INFO->current_item + offset, to_read);
931 goto update_buf_len;
932 }
933 else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
934 {
935 blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
936 #ifdef REISERDEBUG
937 printf ("indirect_read: offset=%d, blocksize=%d\n",
938 offset, blocksize);
939 #endif /* REISERDEBUG */
940
941 while (offset < blocksize)
942 {
943 __u32 blocknr = ((__u32 *) INFO->current_item)
944 [offset >> INFO->fullblocksize_shift];
945 int blk_offset = offset & (INFO->blocksize-1);
946
947 to_read = INFO->blocksize - blk_offset;
948 if (to_read > len)
949 to_read = len;
950
951 disk_read_func = disk_read_hook;
952
953 /* Journal is only for meta data. Data blocks can be read
954 * directly without using block_read
955 */
956 devread (ffi, blocknr << INFO->blocksize_shift,
957 blk_offset, to_read, buf);
958
959 disk_read_func = NULL;
960 update_buf_len:
961 len -= to_read;
962 buf += to_read;
963 offset += to_read;
964 filepos += to_read;
965 if (len == 0)
966 goto done;
967 }
968 }
969 get_next_key:
970 next_key (ffi);
971 }
972 done:
973 return errnum ? 0 : buf - prev_buf;
974 }
975
976
977 /* preconditions: reiserfs_mount already executed, therefore
978 * INFO block is valid
979 * returns: 0 if error, nonzero iff we were able to find the file successfully
980 * postconditions: on a nonzero return, INFO->fileinfo contains the info
981 * of the file we were trying to look up, filepos is 0 and filemax is
982 * the size of the file.
983 */
984 static int
reiserfs_dir(fsi_file_t * ffi,char * dirname)985 reiserfs_dir (fsi_file_t *ffi, char *dirname)
986 {
987 struct reiserfs_de_head *de_head;
988 char *rest, ch;
989 __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
990 #ifndef STAGE1_5
991 int do_possibilities = 0;
992 #endif /* ! STAGE1_5 */
993 char linkbuf[PATH_MAX]; /* buffer for following symbolic links */
994 int link_count = 0;
995 int mode;
996
997 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
998 objectid = REISERFS_ROOT_OBJECTID;
999
1000 while (1)
1001 {
1002 #ifdef REISERDEBUG
1003 printf ("dirname=%s\n", dirname);
1004 #endif /* REISERDEBUG */
1005
1006 /* Search for the stat info first. */
1007 if (! search_stat (ffi, dir_id, objectid))
1008 return 0;
1009
1010 #ifdef REISERDEBUG
1011 printf ("sd_mode=%x sd_size=%d\n",
1012 ((struct stat_data *) INFO->current_item)->sd_mode,
1013 ((struct stat_data *) INFO->current_item)->sd_size);
1014 #endif /* REISERDEBUG */
1015
1016 mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1017
1018 /* If we've got a symbolic link, then chase it. */
1019 if (S_ISLNK (mode))
1020 {
1021 int len;
1022 if (++link_count > MAX_LINK_COUNT)
1023 {
1024 errnum = ERR_SYMLINK_LOOP;
1025 return 0;
1026 }
1027
1028 /* Get the symlink size. */
1029 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1030
1031 /* Find out how long our remaining name is. */
1032 len = 0;
1033 while (dirname[len] && !isspace ((uint8_t)dirname[len]))
1034 len++;
1035
1036 if (filemax + len > sizeof (linkbuf) - 1)
1037 {
1038 errnum = ERR_FILELENGTH;
1039 return 0;
1040 }
1041
1042 /* Copy the remaining name to the end of the symlink data.
1043 Note that DIRNAME and LINKBUF may overlap! */
1044 grub_memmove (linkbuf + filemax, dirname, len+1);
1045
1046 INFO->fileinfo.k_dir_id = dir_id;
1047 INFO->fileinfo.k_objectid = objectid;
1048 filepos = 0;
1049 if (! next_key (ffi)
1050 || reiserfs_read (ffi, linkbuf, filemax) != filemax)
1051 {
1052 if (! errnum)
1053 errnum = ERR_FSYS_CORRUPT;
1054 return 0;
1055 }
1056
1057 #ifdef REISERDEBUG
1058 printf ("symlink=%s\n", linkbuf);
1059 #endif /* REISERDEBUG */
1060
1061 dirname = linkbuf;
1062 if (*dirname == '/')
1063 {
1064 /* It's an absolute link, so look it up in root. */
1065 dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1066 objectid = REISERFS_ROOT_OBJECTID;
1067 }
1068 else
1069 {
1070 /* Relative, so look it up in our parent directory. */
1071 dir_id = parent_dir_id;
1072 objectid = parent_objectid;
1073 }
1074
1075 /* Now lookup the new name. */
1076 continue;
1077 }
1078
1079 /* if we have a real file (and we're not just printing possibilities),
1080 then this is where we want to exit */
1081
1082 if (! *dirname || isspace ((uint8_t)*dirname))
1083 {
1084 if (! S_ISREG (mode))
1085 {
1086 errnum = ERR_BAD_FILETYPE;
1087 return 0;
1088 }
1089
1090 filepos = 0;
1091 filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1092
1093 /* If this is a new stat data and size is > 4GB set filemax to
1094 * maximum
1095 */
1096 if (INFO->current_ih->ih_version == ITEM_VERSION_2
1097 && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1098 filemax = 0xffffffff;
1099
1100 INFO->fileinfo.k_dir_id = dir_id;
1101 INFO->fileinfo.k_objectid = objectid;
1102 return next_key (ffi);
1103 }
1104
1105 /* continue with the file/directory name interpretation */
1106 while (*dirname == '/')
1107 dirname++;
1108 if (! S_ISDIR (mode))
1109 {
1110 errnum = ERR_BAD_FILETYPE;
1111 return 0;
1112 }
1113 for (rest = dirname; (ch = *rest) && ! isspace ((uint8_t)ch) && ch != '/'; rest++);
1114 *rest = 0;
1115
1116 # ifndef STAGE1_5
1117 if (print_possibilities && ch != '/')
1118 do_possibilities = 1;
1119 # endif /* ! STAGE1_5 */
1120
1121 while (1)
1122 {
1123 char *name_end;
1124 int num_entries;
1125
1126 if (! next_key (ffi))
1127 return 0;
1128 #ifdef REISERDEBUG
1129 printf ("ih: key %d:%d:%d:%d version:%d\n",
1130 INFO->current_ih->ih_key.k_dir_id,
1131 INFO->current_ih->ih_key.k_objectid,
1132 INFO->current_ih->ih_key.u.v1.k_offset,
1133 INFO->current_ih->ih_key.u.v1.k_uniqueness,
1134 INFO->current_ih->ih_version);
1135 #endif /* REISERDEBUG */
1136
1137 if (INFO->current_ih->ih_key.k_objectid != objectid)
1138 break;
1139
1140 name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1141 de_head = (struct reiserfs_de_head *) INFO->current_item;
1142 num_entries = INFO->current_ih->u.ih_entry_count;
1143 while (num_entries > 0)
1144 {
1145 char *filename = INFO->current_item + de_head->deh_location;
1146 char tmp = *name_end;
1147 if ((de_head->deh_state & DEH_Visible))
1148 {
1149 int cmp;
1150 /* Directory names in ReiserFS are not null
1151 * terminated. We write a temporary 0 behind it.
1152 * NOTE: that this may overwrite the first block in
1153 * the tree cache. That doesn't hurt as long as we
1154 * don't call next_key () in between.
1155 */
1156 *name_end = 0;
1157 cmp = substring (dirname, filename);
1158 *name_end = tmp;
1159 # ifndef STAGE1_5
1160 if (do_possibilities)
1161 {
1162 if (cmp <= 0)
1163 {
1164 if (print_possibilities > 0)
1165 print_possibilities = -print_possibilities;
1166 *name_end = 0;
1167 print_a_completion (filename);
1168 *name_end = tmp;
1169 }
1170 }
1171 else
1172 # endif /* ! STAGE1_5 */
1173 if (cmp == 0)
1174 goto found;
1175 }
1176 /* The beginning of this name marks the end of the next name.
1177 */
1178 name_end = filename;
1179 de_head++;
1180 num_entries--;
1181 }
1182 }
1183
1184 # ifndef STAGE1_5
1185 if (print_possibilities < 0)
1186 return 1;
1187 # endif /* ! STAGE1_5 */
1188
1189 errnum = ERR_FILE_NOT_FOUND;
1190 *rest = ch;
1191 return 0;
1192
1193 found:
1194
1195 *rest = ch;
1196 dirname = rest;
1197
1198 parent_dir_id = dir_id;
1199 parent_objectid = objectid;
1200 dir_id = de_head->deh_dir_id;
1201 objectid = de_head->deh_objectid;
1202 }
1203 }
1204
1205 int
reiserfs_embed(fsi_file_t * ffi,int * start_sector,int needed_sectors)1206 reiserfs_embed (fsi_file_t *ffi, int *start_sector, int needed_sectors)
1207 {
1208 struct reiserfs_super_block super;
1209 int num_sectors;
1210
1211 if (! devread (ffi, REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1212 sizeof (struct reiserfs_super_block), (char *) &super))
1213 return 0;
1214
1215 *start_sector = 1; /* reserve first sector for stage1 */
1216 if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1217 || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1218 || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1219 && (/* check that this is not a super block copy inside
1220 * the journal log */
1221 super.s_journal_block * super.s_blocksize
1222 > REISERFS_DISK_OFFSET_IN_BYTES))
1223 num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1224 else
1225 num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1226
1227 return (needed_sectors <= num_sectors);
1228 }
1229
1230 fsi_plugin_ops_t *
fsi_init_plugin(int version,fsi_plugin_t * fp,const char ** name)1231 fsi_init_plugin(int version, fsi_plugin_t *fp, const char **name)
1232 {
1233 static fsig_plugin_ops_t ops = {
1234 FSIMAGE_PLUGIN_VERSION,
1235 .fpo_mount = reiserfs_mount,
1236 .fpo_dir = reiserfs_dir,
1237 .fpo_read = reiserfs_read
1238 };
1239
1240 *name = "reiserfs";
1241 return (fsig_init(fp, &ops));
1242 }
1243