ReactOS  0.4.14-dev-554-g2f8d847
recovery.c
Go to the documentation of this file.
1 /*
2  * linux/fs/jbd/recovery.c
3  *
4  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5  *
6  * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
7  *
8  * This file is part of the Linux kernel and is made available under
9  * the terms of the GNU General Public License, version 2, or at your
10  * option, any later version, incorporated herein by reference.
11  *
12  * Journal recovery routines for the generic filesystem journaling code;
13  * part of the ext2fs journaling system.
14  */
15 
16 #ifndef __KERNEL__
17 #include "jfs_user.h"
18 #else
19 #include <linux/module.h>
20 #include <linux/time.h>
21 #include <linux/fs.h>
22 #include <linux/jbd.h>
23 #include <linux/errno.h>
24 #include <linux/slab.h>
25 #endif
26 
27 /*
28  * Maintain information about the progress of the recovery job, so that
29  * the different passes can carry information between them.
30  */
32 {
35 
39 };
40 
42 static int do_one_pass(journal_t *journal,
43  struct recovery_info *info, enum passtype pass);
44 static int scan_revoke_records(journal_t *, struct buffer_head *,
45  tid_t, struct recovery_info *);
46 
47 #ifdef __KERNEL__
48 
49 /* Release readahead buffers after use */
50 static void journal_brelse_array(struct buffer_head *b[], int n)
51 {
52  while (--n >= 0)
53  brelse (b[n]);
54 }
55 
56 
57 /*
58  * When reading from the journal, we are going through the block device
59  * layer directly and so there is no readahead being done for us. We
60  * need to implement any readahead ourselves if we want it to happen at
61  * all. Recovery is basically one long sequential read, so make sure we
62  * do the IO in reasonably large chunks.
63  *
64  * This is not so critical that we need to be enormously clever about
65  * the readahead size, though. 128K is a purely arbitrary, good-enough
66  * fixed value.
67  */
68 
69 #define MAXBUF 8
70 static int do_readahead(journal_t *journal, unsigned int start)
71 {
72  int err;
73  unsigned int max, nbufs, next;
74  unsigned long blocknr;
75  struct buffer_head *bh;
76 
77  struct buffer_head * bufs[MAXBUF];
78 
79  /* Do up to 128K of readahead */
80  max = start + (128 * 1024 / journal->j_blocksize);
81  if (max > journal->j_maxlen)
82  max = journal->j_maxlen;
83 
84  /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
85  * a time to the block device IO layer. */
86 
87  nbufs = 0;
88 
89  for (next = start; next < max; next++) {
90  err = journal_bmap(journal, next, &blocknr);
91 
92  if (err) {
93  printk (KERN_ERR "JBD: bad block at offset %u\n",
94  next);
95  goto failed;
96  }
97 
98  bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
99  if (!bh) {
100  err = -ENOMEM;
101  goto failed;
102  }
103 
104  if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
105  bufs[nbufs++] = bh;
106  if (nbufs == MAXBUF) {
107  ll_rw_block(READ, nbufs, bufs);
108  journal_brelse_array(bufs, nbufs);
109  nbufs = 0;
110  }
111  } else
112  brelse(bh);
113  }
114 
115  if (nbufs)
116  ll_rw_block(READ, nbufs, bufs);
117  err = 0;
118 
119 failed:
120  if (nbufs)
121  journal_brelse_array(bufs, nbufs);
122  return err;
123 }
124 
125 #endif /* __KERNEL__ */
126 
127 
128 /*
129  * Read a block from the journal
130  */
131 
132 static int jread(struct buffer_head **bhp, journal_t *journal,
133  unsigned int offset)
134 {
135  int err;
136  unsigned long blocknr;
137  struct buffer_head *bh;
138 
139  *bhp = NULL;
140 
141  if (offset >= journal->j_maxlen) {
142  printk(KERN_ERR "JBD: corrupted journal superblock\n");
143  return -EIO;
144  }
145 
146  err = journal_bmap(journal, offset, &blocknr);
147 
148  if (err) {
149  printk (KERN_ERR "JBD: bad block at offset %u\n",
150  offset);
151  return err;
152  }
153 
154  bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
155  if (!bh)
156  return -ENOMEM;
157 
158  if (!buffer_uptodate(bh)) {
159  /* If this is a brand new buffer, start readahead.
160  Otherwise, we assume we are already reading it. */
161  if (!buffer_req(bh))
162  do_readahead(journal, offset);
163  wait_on_buffer(bh);
164  }
165 
166  if (!buffer_uptodate(bh)) {
167  printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
168  offset);
169  brelse(bh);
170  return -EIO;
171  }
172 
173  *bhp = bh;
174  return 0;
175 }
176 
177 
178 /*
179  * Count the number of in-use tags in a journal descriptor block.
180  */
181 
182 static int count_tags(struct buffer_head *bh, int size)
183 {
184  char * tagp;
186  int nr = 0;
187 
188  tagp = &bh->b_data[sizeof(journal_header_t)];
189 
190  while (((int)(tagp - bh->b_data) + (int)sizeof(journal_block_tag_t)) <= size) {
191  tag = (journal_block_tag_t *) tagp;
192 
193  nr++;
194  tagp += sizeof(journal_block_tag_t);
195  if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
196  tagp += 16;
197 
198  if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
199  break;
200  }
201 
202  return nr;
203 }
204 
205 
206 /* Make sure we wrap around the log correctly! */
207 #define wrap(journal, var) \
208 do { \
209  if (var >= (journal)->j_last) \
210  var -= ((journal)->j_last - (journal)->j_first); \
211 } while (0)
212 
225 int journal_recover(journal_t *journal)
226 {
227  int err;
229 
230  struct recovery_info info;
231 
232  memset(&info, 0, sizeof(info));
233  sb = journal->j_superblock;
234 
235  /*
236  * The journal superblock's s_start field (the current log head)
237  * is always zero if, and only if, the journal was cleanly
238  * unmounted.
239  */
240 
241  if (!sb->s_start) {
242  jbd_debug(1, "No recovery required, last transaction %d\n",
243  be32_to_cpu(sb->s_sequence));
244  journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
245  return 0;
246  }
247 
248  err = do_one_pass(journal, &info, PASS_SCAN);
249  if (!err)
250  err = do_one_pass(journal, &info, PASS_REVOKE);
251  if (!err)
252  err = do_one_pass(journal, &info, PASS_REPLAY);
253 
254  jbd_debug(1, "JBD: recovery, exit status %d, "
255  "recovered transactions %u to %u\n",
256  err, info.start_transaction, info.end_transaction);
257  jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
258  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259 
260  /* Restart the log at the next transaction ID, thus invalidating
261  * any existing commit records in the log. */
262  journal->j_transaction_sequence = ++info.end_transaction;
263 
264  journal_clear_revoke(journal);
265  sync_blockdev(journal->j_fs_dev);
266  return err;
267 }
268 
282 int journal_skip_recovery(journal_t *journal)
283 {
284  int err;
286 
287  struct recovery_info info;
288 
289  memset (&info, 0, sizeof(info));
290  sb = journal->j_superblock;
291 
292  err = do_one_pass(journal, &info, PASS_SCAN);
293 
294  if (err) {
295  printk(KERN_ERR "JBD: error %d scanning journal\n", err);
296  ++journal->j_transaction_sequence;
297  } else {
298 #ifdef CONFIG_JBD_DEBUG
299  int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
300  jbd_debug(1,
301  "JBD: ignoring %d transaction%s from the journal.\n",
302  dropped, (dropped == 1) ? "" : "s");
303 #endif
304  journal->j_transaction_sequence = ++info.end_transaction;
305  }
306 
307  journal->j_tail = 0;
308  return err;
309 }
310 
311 static int do_one_pass(journal_t *journal,
312  struct recovery_info *info, enum passtype pass)
313 {
314  unsigned int first_commit_ID, next_commit_ID;
315  unsigned long next_log_block;
316  int err, success = 0;
318  journal_header_t * tmp;
319  struct buffer_head * bh;
320  unsigned int sequence;
321  int blocktype;
322 
323  /* Precompute the maximum metadata descriptors in a descriptor block */
324  int MAX_BLOCKS_PER_DESC;
325  MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
326  / sizeof(journal_block_tag_t));
327 
328  /*
329  * First thing is to establish what we expect to find in the log
330  * (in terms of transaction IDs), and where (in terms of log
331  * block offsets): query the superblock.
332  */
333 
334  sb = journal->j_superblock;
335  next_commit_ID = be32_to_cpu(sb->s_sequence);
336  next_log_block = be32_to_cpu(sb->s_start);
337 
338  first_commit_ID = next_commit_ID;
339  if (pass == PASS_SCAN)
340  info->start_transaction = first_commit_ID;
341 
342  jbd_debug(1, "Starting recovery pass %d\n", pass);
343 
344  /*
345  * Now we walk through the log, transaction by transaction,
346  * making sure that each transaction has a commit block in the
347  * expected place. Each complete transaction gets replayed back
348  * into the main filesystem.
349  */
350 
351  while (1) {
352  int flags;
353  char * tagp;
355  struct buffer_head * obh;
356  struct buffer_head * nbh;
357 
358  cond_resched();
359 
360  /* If we already know where to stop the log traversal,
361  * check right now that we haven't gone past the end of
362  * the log. */
363 
364  if (pass != PASS_SCAN)
365  if (tid_geq(next_commit_ID, info->end_transaction))
366  break;
367 
368  jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
369  next_commit_ID, next_log_block, journal->j_last);
370 
371  /* Skip over each chunk of the transaction looking
372  * either the next descriptor block or the final commit
373  * record. */
374 
375  jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
376  err = jread(&bh, journal, next_log_block);
377  if (err)
378  goto failed;
379 
380  next_log_block++;
381  wrap(journal, next_log_block);
382 
383  /* What kind of buffer is it?
384  *
385  * If it is a descriptor block, check that it has the
386  * expected sequence number. Otherwise, we're all done
387  * here. */
388 
389  tmp = (journal_header_t *)bh->b_data;
390 
392  brelse(bh);
393  break;
394  }
395 
396  blocktype = be32_to_cpu(tmp->h_blocktype);
398  jbd_debug(3, "Found magic %d, sequence %d\n",
399  blocktype, sequence);
400 
401  if (sequence != next_commit_ID) {
402  brelse(bh);
403  break;
404  }
405 
406  /* OK, we have a valid descriptor block which matches
407  * all of the sequence number checks. What are we going
408  * to do with it? That depends on the pass... */
409 
410  switch (blocktype) {
412  /* If it is a valid descriptor block, replay it
413  * in pass REPLAY; otherwise, just skip over the
414  * blocks it describes. */
415  if (pass != PASS_REPLAY) {
416  next_log_block +=
417  count_tags(bh, journal->j_blocksize);
418  wrap(journal, next_log_block);
419  brelse(bh);
420  continue;
421  }
422 
423  /* A descriptor block: we can now write all of
424  * the data blocks. Yay, useful work is finally
425  * getting done here! */
426 
427  tagp = &bh->b_data[sizeof(journal_header_t)];
428  while (((int)(tagp - bh->b_data) + (int)sizeof(journal_block_tag_t))
429  <= journal->j_blocksize) {
430  unsigned long io_block;
431 
432  tag = (journal_block_tag_t *) tagp;
433  flags = be32_to_cpu(tag->t_flags);
434 
435  io_block = next_log_block++;
436  wrap(journal, next_log_block);
437  err = jread(&obh, journal, io_block);
438  if (err) {
439  /* Recover what we can, but
440  * report failure at the end. */
441  success = err;
443  "JBD: IO error %d recovering "
444  "block %ld in log\n",
445  err, io_block);
446  } else {
447  unsigned long blocknr;
448 
449  J_ASSERT(obh != NULL);
450  blocknr = be32_to_cpu(tag->t_blocknr);
451 
452  /* If the block has been
453  * revoked, then we're all done
454  * here. */
456  (journal, blocknr,
457  next_commit_ID)) {
458  brelse(obh);
459  ++info->nr_revoke_hits;
460  goto skip_write;
461  }
462 
463  /* Find a buffer for the new
464  * data being restored */
465  nbh = __getblk(journal->j_fs_dev,
466  blocknr,
467  journal->j_blocksize);
468  if (nbh == NULL) {
470  "JBD: Out of memory "
471  "during recovery.\n");
472  err = -ENOMEM;
473  brelse(bh);
474  brelse(obh);
475  goto failed;
476  }
477 
478  lock_buffer(nbh);
479  memcpy(nbh->b_data, obh->b_data,
480  journal->j_blocksize);
481  if (flags & JFS_FLAG_ESCAPE) {
482  *((__be32 *)bh->b_data) =
484  }
485 
486  BUFFER_TRACE(nbh, "marking dirty");
487  set_buffer_uptodate(nbh);
488  mark_buffer_dirty(nbh);
489  BUFFER_TRACE(nbh, "marking uptodate");
490  ++info->nr_replays;
491  /* ll_rw_block(WRITE, 1, &nbh); */
492  unlock_buffer(nbh);
493  brelse(obh);
494  brelse(nbh);
495  }
496 
497 skip_write:
498  tagp += sizeof(journal_block_tag_t);
499  if (!(flags & JFS_FLAG_SAME_UUID))
500  tagp += 16;
501 
502  if (flags & JFS_FLAG_LAST_TAG)
503  break;
504  }
505 
506  brelse(bh);
507  continue;
508 
509  case JFS_COMMIT_BLOCK:
510  /* Found an expected commit block: not much to
511  * do other than move on to the next sequence
512  * number. */
513  brelse(bh);
514  next_commit_ID++;
515  continue;
516 
517  case JFS_REVOKE_BLOCK:
518  /* If we aren't in the REVOKE pass, then we can
519  * just skip over this block. */
520  if (pass != PASS_REVOKE) {
521  brelse(bh);
522  continue;
523  }
524 
525  err = scan_revoke_records(journal, bh,
526  next_commit_ID, info);
527  brelse(bh);
528  if (err)
529  goto failed;
530  continue;
531 
532  default:
533  jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
534  blocktype);
535  brelse(bh);
536  goto done;
537  }
538  }
539 
540 done:
541  /*
542  * We broke out of the log scan loop: either we came to the
543  * known end of the log or we found an unexpected block in the
544  * log. If the latter happened, then we know that the "current"
545  * transaction marks the end of the valid log.
546  */
547 
548  if (pass == PASS_SCAN)
549  info->end_transaction = next_commit_ID;
550  else {
551  /* It's really bad news if different passes end up at
552  * different places (but possible due to IO errors). */
553  if (info->end_transaction != next_commit_ID) {
554  printk (KERN_ERR "JBD: recovery pass %d ended at "
555  "transaction %u, expected %u\n",
556  pass, next_commit_ID, info->end_transaction);
557  if (!success)
558  success = -EIO;
559  }
560  }
561 
562  return success;
563 
564 failed:
565  return err;
566 }
567 
568 
569 /* Scan a revoke record, marking all blocks mentioned as revoked. */
570 
571 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
572  tid_t sequence, struct recovery_info *info)
573 {
575  int offset, max;
576 
579  max = be32_to_cpu(header->r_count);
580 
581  while (offset < max) {
582  unsigned long blocknr;
583  int err;
584 
585  blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
586  offset += 4;
587  err = journal_set_revoke(journal, blocknr, sequence);
588  if (err)
589  return err;
590  ++info->nr_revokes;
591  }
592  return 0;
593 }
#define be32_to_cpu
Definition: module.h:155
tid_t end_transaction
Definition: recovery.c:34
#define max(a, b)
Definition: svc.c:63
#define cpu_to_be32
Definition: module.h:154
#define JFS_FLAG_SAME_UUID
Definition: jbd.h:208
int journal_test_revoke(journal_t *journal, unsigned long blocknr, tid_t sequence)
Definition: revoke.c:670
#define JFS_FLAG_ESCAPE
Definition: jbd.h:207
void unlock_buffer(struct buffer_head *bh)
Definition: linux.c:845
const GLenum * bufs
Definition: glext.h:6026
ULONG nr
Definition: thread.c:7
struct journal_header_s journal_header_t
superblock * sb
Definition: btrfs.c:4137
tid_t
Definition: ieframe.h:311
__be32 h_sequence
Definition: jbd.h:182
__be32 h_blocktype
Definition: jbd.h:181
GLintptr offset
Definition: glext.h:5920
static int cond_resched()
Definition: module.h:432
GLdouble n
Definition: glext.h:7729
tid_t start_transaction
Definition: recovery.c:33
Definition: ecma_167.h:138
void ll_rw_block(int, int, struct buffer_head *bh[])
Definition: linux.c:854
int nr_revoke_hits
Definition: recovery.c:38
Definition: arc.h:48
struct buffer_head * __getblk(struct block_device *bdev, sector_t block, unsigned long size)
Definition: linux.c:791
static void brelse(struct buffer_head *bh)
Definition: module.h:945
void journal_clear_revoke(journal_t *journal)
Definition: revoke.c:689
pass
Definition: typegen.h:24
#define READ(_gif, _buf, _len)
Definition: ungif.c:107
static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass)
Definition: recovery.c:311
__be32 h_magic
Definition: jbd.h:180
static void wait_on_buffer(struct buffer_head *bh)
Definition: module.h:1011
#define JFS_MAGIC_NUMBER
Definition: jbd.h:159
#define JFS_COMMIT_BLOCK
Definition: jbd.h:170
smooth NULL
Definition: ftsmooth.c:416
#define JFS_REVOKE_BLOCK
Definition: jbd.h:173
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
asmlinkage int printk(const char *fmt,...)
Definition: output.c:76
GLsizeiptr size
Definition: glext.h:5919
#define JFS_DESCRIPTOR_BLOCK
Definition: jbd.h:169
if(!(yy_init))
Definition: macro.lex.yy.c:714
static int tid_geq(tid_t x, tid_t y)
Definition: jbd.h:1091
#define KERN_ERR
Definition: module.h:223
#define success(from, fromstr, to, tostr)
#define JFS_FLAG_LAST_TAG
Definition: jbd.h:210
GLbitfield flags
Definition: glext.h:7161
static int count_tags(struct buffer_head *bh, int size)
Definition: recovery.c:182
int journal_recover(journal_t *journal)
Definition: recovery.c:225
static struct message * sequence
Definition: subclass.c:48
static int scan_revoke_records(journal_t *, struct buffer_head *, tid_t, struct recovery_info *)
Definition: recovery.c:571
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
int journal_bmap(journal_t *journal, unsigned long blocknr, unsigned long *retp)
Definition: replay.c:484
#define err(...)
void mark_buffer_dirty(struct buffer_head *bh)
Definition: linux.c:906
static int do_readahead(mpg123_handle *fr, unsigned long newhead)
Definition: parse.c:993
char * b_data
Definition: module.h:725
static unsigned __int64 next
Definition: rand_nt.c:6
static int jread(struct buffer_head **bhp, journal_t *journal, unsigned int offset)
Definition: recovery.c:132
__u32 __bitwise __be32
Definition: types.h:63
GLuint start
Definition: gl.h:1545
static void lock_buffer(struct buffer_head *bh)
Definition: module.h:1018
Definition: arc.h:40
int nr_replays
Definition: recovery.c:36
int journal_skip_recovery(journal_t *journal)
Definition: recovery.c:282
passtype
Definition: recovery.c:41
struct journal_block_tag_s journal_block_tag_t
int sync_blockdev(struct block_device *bdev)
Definition: linux.c:911
#define wrap(journal, var)
Definition: recovery.c:207
int nr_revokes
Definition: recovery.c:37
#define memset(x, y, z)
Definition: compat.h:39
struct CFHEADER header
Definition: fdi.c:101
int journal_set_revoke(journal_t *journal, unsigned long blocknr, tid_t sequence)
Definition: revoke.c:646
char * tag
Definition: main.c:59