d9/d8f/drivers_2filesystems_2ext2_2src_2jbd_2recovery_8c_source.html

/*

 * linux/fs/jbd/recovery.c

 *

 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999

 *

 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved

 *

 * This file is part of the Linux kernel and is made available under

 * the terms of the GNU General Public License, version 2, or at your

 * option, any later version, incorporated herein by reference.

 *

 * Journal recovery routines for the generic filesystem journaling code;

 * part of the ext2fs journaling system.

 */


#ifndef __KERNEL__

#include "jfs_user.h"

#else

#include <linux/module.h>

#include <linux/time.h>

#include <linux/fs.h>

#include <linux/jbd.h>

#include <linux/errno.h>

#include <linux/slab.h>

#endif


/*

 * Maintain information about the progress of the recovery job, so that

 * the different passes can carry information between them.

 */

struct recovery_info

{

    tid_t       start_transaction;

    tid_t       end_transaction;


    int     nr_replays;

    int     nr_revokes;

    int     nr_revoke_hits;

};


enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};

static int do_one_pass(journal_t *journal,

                       struct recovery_info *info, enum passtype pass);

static int scan_revoke_records(journal_t *, struct buffer_head *,

                               tid_t, struct recovery_info *);


#ifdef __KERNEL__


/* Release readahead buffers after use */

static void journal_brelse_array(struct buffer_head *b[], int n)

{

    while (--n >= 0)

        brelse (b[n]);

}


/*

 * When reading from the journal, we are going through the block device

 * layer directly and so there is no readahead being done for us.  We

 * need to implement any readahead ourselves if we want it to happen at

 * all.  Recovery is basically one long sequential read, so make sure we

 * do the IO in reasonably large chunks.

 *

 * This is not so critical that we need to be enormously clever about

 * the readahead size, though.  128K is a purely arbitrary, good-enough

 * fixed value.

 */


#define MAXBUF 8

static int do_readahead(journal_t *journal, unsigned int start)

{

    int err;

    unsigned int max, nbufs, next;

    unsigned long blocknr;

    struct buffer_head *bh;


    struct buffer_head * bufs[MAXBUF];


    /* Do up to 128K of readahead */

    max = start + (128 * 1024 / journal->j_blocksize);

    if (max > journal->j_maxlen)

        max = journal->j_maxlen;


    /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at

     * a time to the block device IO layer. */


    nbufs = 0;


    for (next = start; next < max; next++) {

        err = journal_bmap(journal, next, &blocknr);


        if (err) {

            printk (KERN_ERR "JBD: bad block at offset %u\n",

                    next);

            goto failed;

        }


        bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);

        if (!bh) {

            err = -ENOMEM;

            goto failed;

        }


        if (!buffer_uptodate(bh) && !buffer_locked(bh)) {

            bufs[nbufs++] = bh;

            if (nbufs == MAXBUF) {

                ll_rw_block(READ, nbufs, bufs);

                journal_brelse_array(bufs, nbufs);

                nbufs = 0;

            }

        } else

            brelse(bh);

    }


    if (nbufs)

        ll_rw_block(READ, nbufs, bufs);

    err = 0;


failed:

    if (nbufs)

        journal_brelse_array(bufs, nbufs);

    return err;

}


#endif /* __KERNEL__ */


/*

 * Read a block from the journal

 */


static int jread(struct buffer_head **bhp, journal_t *journal,

                 unsigned int offset)

{

    int err;

    unsigned long blocknr;

    struct buffer_head *bh;


    *bhp = NULL;


    if (offset >= journal->j_maxlen) {

        printk(KERN_ERR "JBD: corrupted journal superblock\n");

        return -EIO;

    }


    err = journal_bmap(journal, offset, &blocknr);


    if (err) {

        printk (KERN_ERR "JBD: bad block at offset %u\n",

                offset);

        return err;

    }


    bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);

    if (!bh)

        return -ENOMEM;


    if (!buffer_uptodate(bh)) {

        /* If this is a brand new buffer, start readahead.

                   Otherwise, we assume we are already reading it.  */

        if (!buffer_req(bh))

            do_readahead(journal, offset);

        wait_on_buffer(bh);

    }


    if (!buffer_uptodate(bh)) {

        printk (KERN_ERR "JBD: Failed to read block at offset %u\n",

                offset);

        brelse(bh);

        return -EIO;

    }


    *bhp = bh;

    return 0;

}


/*

 * Count the number of in-use tags in a journal descriptor block.

 */


static int count_tags(struct buffer_head *bh, int size)

{

    char *          tagp;

    journal_block_tag_t *   tag;

    int         nr = 0;


    tagp = &bh->b_data[sizeof(journal_header_t)];


    while (((int)(tagp - bh->b_data) + (int)sizeof(journal_block_tag_t)) <= size) {

        tag = (journal_block_tag_t *) tagp;


        nr++;

        tagp += sizeof(journal_block_tag_t);

        if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))

            tagp += 16;


        if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))

            break;

    }


    return nr;

}


/* Make sure we wrap around the log correctly! */

#define wrap(journal, var)                      \

do {                                    \

    if (var >= (journal)->j_last)                   \

        var -= ((journal)->j_last - (journal)->j_first);    \

} while (0)


int journal_recover(journal_t *journal)

{

    int         err;

    journal_superblock_t *  sb;


    struct recovery_info    info;


    memset(&info, 0, sizeof(info));

    sb = journal->j_superblock;


    /*

     * The journal superblock's s_start field (the current log head)

     * is always zero if, and only if, the journal was cleanly

     * unmounted.

     */


    if (!sb->s_start) {

        jbd_debug(1, "No recovery required, last transaction %d\n",

                  be32_to_cpu(sb->s_sequence));

        journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;

        return 0;

    }


    err = do_one_pass(journal, &info, PASS_SCAN);

    if (!err)

        err = do_one_pass(journal, &info, PASS_REVOKE);

    if (!err)

        err = do_one_pass(journal, &info, PASS_REPLAY);


    jbd_debug(1, "JBD: recovery, exit status %d, "

              "recovered transactions %u to %u\n",

              err, info.start_transaction, info.end_transaction);

    jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",

              info.nr_replays, info.nr_revoke_hits, info.nr_revokes);


    /* Restart the log at the next transaction ID, thus invalidating

     * any existing commit records in the log. */

    journal->j_transaction_sequence = ++info.end_transaction;


    journal_clear_revoke(journal);

    sync_blockdev(journal->j_fs_dev);

    return err;

}


int journal_skip_recovery(journal_t *journal)

{

    int         err;

    journal_superblock_t *  sb;


    struct recovery_info    info;


    memset (&info, 0, sizeof(info));

    sb = journal->j_superblock;


    err = do_one_pass(journal, &info, PASS_SCAN);


    if (err) {

        printk(KERN_ERR "JBD: error %d scanning journal\n", err);

        ++journal->j_transaction_sequence;

    } else {

#ifdef CONFIG_JBD_DEBUG

        int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);

        jbd_debug(1,

                  "JBD: ignoring %d transaction%s from the journal.\n",

                  dropped, (dropped == 1) ? "" : "s");

#endif

        journal->j_transaction_sequence = ++info.end_transaction;

    }


    journal->j_tail = 0;

    return err;

}


static int do_one_pass(journal_t *journal,

                       struct recovery_info *info, enum passtype pass)

{

    unsigned int        first_commit_ID, next_commit_ID;

    unsigned long       next_log_block;

    int         err, success = 0;

    journal_superblock_t *  sb;

    journal_header_t *  tmp;

    struct buffer_head *    bh;

    unsigned int        sequence;

    int         blocktype;


    /* Precompute the maximum metadata descriptors in a descriptor block */

    int         MAX_BLOCKS_PER_DESC;

    MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))

                           / sizeof(journal_block_tag_t));


    /*

     * First thing is to establish what we expect to find in the log

     * (in terms of transaction IDs), and where (in terms of log

     * block offsets): query the superblock.

     */


    sb = journal->j_superblock;

    next_commit_ID = be32_to_cpu(sb->s_sequence);

    next_log_block = be32_to_cpu(sb->s_start);


    first_commit_ID = next_commit_ID;

    if (pass == PASS_SCAN)

        info->start_transaction = first_commit_ID;


    jbd_debug(1, "Starting recovery pass %d\n", pass);


    /*

     * Now we walk through the log, transaction by transaction,

     * making sure that each transaction has a commit block in the

     * expected place.  Each complete transaction gets replayed back

     * into the main filesystem.

     */


    while (1) {

        int         flags;

        char *          tagp;

        journal_block_tag_t *   tag;

        struct buffer_head *    obh;

        struct buffer_head *    nbh;


        cond_resched();


        /* If we already know where to stop the log traversal,

         * check right now that we haven't gone past the end of

         * the log. */


        if (pass != PASS_SCAN)

            if (tid_geq(next_commit_ID, info->end_transaction))

                break;


        jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",

                  next_commit_ID, next_log_block, journal->j_last);


        /* Skip over each chunk of the transaction looking

         * either the next descriptor block or the final commit

         * record. */


        jbd_debug(3, "JBD: checking block %ld\n", next_log_block);

        err = jread(&bh, journal, next_log_block);

        if (err)

            goto failed;


        next_log_block++;

        wrap(journal, next_log_block);


        /* What kind of buffer is it?

         *

         * If it is a descriptor block, check that it has the

         * expected sequence number.  Otherwise, we're all done

         * here. */


        tmp = (journal_header_t *)bh->b_data;


        if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {

            brelse(bh);

            break;

        }


        blocktype = be32_to_cpu(tmp->h_blocktype);

        sequence = be32_to_cpu(tmp->h_sequence);

        jbd_debug(3, "Found magic %d, sequence %d\n",

                  blocktype, sequence);


        if (sequence != next_commit_ID) {

            brelse(bh);

            break;

        }


        /* OK, we have a valid descriptor block which matches

         * all of the sequence number checks.  What are we going

         * to do with it?  That depends on the pass... */


        switch (blocktype) {

        case JFS_DESCRIPTOR_BLOCK:

            /* If it is a valid descriptor block, replay it

             * in pass REPLAY; otherwise, just skip over the

             * blocks it describes. */

            if (pass != PASS_REPLAY) {

                next_log_block +=

                    count_tags(bh, journal->j_blocksize);

                wrap(journal, next_log_block);

                brelse(bh);

                continue;

            }


            /* A descriptor block: we can now write all of

             * the data blocks.  Yay, useful work is finally

             * getting done here! */


            tagp = &bh->b_data[sizeof(journal_header_t)];

            while (((int)(tagp - bh->b_data) + (int)sizeof(journal_block_tag_t))

                    <= journal->j_blocksize) {

                unsigned long io_block;


                tag = (journal_block_tag_t *) tagp;

                flags = be32_to_cpu(tag->t_flags);


                io_block = next_log_block++;

                wrap(journal, next_log_block);

                err = jread(&obh, journal, io_block);

                if (err) {

                    /* Recover what we can, but

                     * report failure at the end. */

                    success = err;

                    printk (KERN_ERR

                            "JBD: IO error %d recovering "

                            "block %ld in log\n",

                            err, io_block);

                } else {

                    unsigned long blocknr;


                    J_ASSERT(obh != NULL);

                    blocknr = be32_to_cpu(tag->t_blocknr);


                    /* If the block has been

                     * revoked, then we're all done

                     * here. */

                    if (journal_test_revoke

                            (journal, blocknr,

                             next_commit_ID)) {

                        brelse(obh);

                        ++info->nr_revoke_hits;

                        goto skip_write;

                    }


                    /* Find a buffer for the new

                     * data being restored */

                    nbh = __getblk(journal->j_fs_dev,

                                   blocknr,

                                   journal->j_blocksize);

                    if (nbh == NULL) {

                        printk(KERN_ERR

                               "JBD: Out of memory "

                               "during recovery.\n");

                        err = -ENOMEM;

                        brelse(bh);

                        brelse(obh);

                        goto failed;

                    }


                    lock_buffer(nbh);

                    memcpy(nbh->b_data, obh->b_data,

                           journal->j_blocksize);

                    if (flags & JFS_FLAG_ESCAPE) {

                        *((__be32 *)bh->b_data) =

                            cpu_to_be32(JFS_MAGIC_NUMBER);

                    }


                    BUFFER_TRACE(nbh, "marking dirty");

                    set_buffer_uptodate(nbh);

                    mark_buffer_dirty(nbh);

                    BUFFER_TRACE(nbh, "marking uptodate");

                    ++info->nr_replays;

                    /* ll_rw_block(WRITE, 1, &nbh); */

                    unlock_buffer(nbh);

                    brelse(obh);

                    brelse(nbh);

                }


skip_write:

                tagp += sizeof(journal_block_tag_t);

                if (!(flags & JFS_FLAG_SAME_UUID))

                    tagp += 16;


                if (flags & JFS_FLAG_LAST_TAG)

                    break;

            }


            brelse(bh);

            continue;


        case JFS_COMMIT_BLOCK:

            /* Found an expected commit block: not much to

             * do other than move on to the next sequence

             * number. */

            brelse(bh);

            next_commit_ID++;

            continue;


        case JFS_REVOKE_BLOCK:

            /* If we aren't in the REVOKE pass, then we can

             * just skip over this block. */

            if (pass != PASS_REVOKE) {

                brelse(bh);

                continue;

            }


            err = scan_revoke_records(journal, bh,

                                      next_commit_ID, info);

            brelse(bh);

            if (err)

                goto failed;

            continue;


        default:

            jbd_debug(3, "Unrecognised magic %d, end of scan.\n",

                      blocktype);

            brelse(bh);

            goto done;

        }

    }


done:

    /*

     * We broke out of the log scan loop: either we came to the

     * known end of the log or we found an unexpected block in the

     * log.  If the latter happened, then we know that the "current"

     * transaction marks the end of the valid log.

     */


    if (pass == PASS_SCAN)

        info->end_transaction = next_commit_ID;

    else {

        /* It's really bad news if different passes end up at

         * different places (but possible due to IO errors). */

        if (info->end_transaction != next_commit_ID) {

            printk (KERN_ERR "JBD: recovery pass %d ended at "

                    "transaction %u, expected %u\n",

                    pass, next_commit_ID, info->end_transaction);

            if (!success)

                success = -EIO;

        }

    }


    return success;


failed:

    return err;

}


/* Scan a revoke record, marking all blocks mentioned as revoked. */


static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,

                               tid_t sequence, struct recovery_info *info)

{

    journal_revoke_header_t *header;

    int offset, max;


    header = (journal_revoke_header_t *) bh->b_data;

    offset = sizeof(journal_revoke_header_t);

    max = be32_to_cpu(header->r_count);


    while (offset < max) {

        unsigned long blocknr;

        int err;


        blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));

        offset += 4;

        err = journal_set_revoke(journal, blocknr, sequence);

        if (err)

            return err;

        ++info->nr_revokes;

    }

    return 0;

}

sequence
static struct recvd_message * sequence
Definition: SystemMenu.c:63

ENOMEM
#define ENOMEM
Definition: acclib.h:84

EIO
#define EIO
Definition: acclib.h:81

NULL
#define NULL
Definition: types.h:112

sb
superblock * sb
Definition: btrfs.c:4261

dropped
r dropped
Definition: btrfs.c:3014

errno.h

fs.h

time.h

__be32
__u32 __bitwise __be32
Definition: types.h:67

scan_revoke_records
static int scan_revoke_records(journal_t *, struct buffer_head *, tid_t, struct recovery_info *)
Definition: recovery.c:571

count_tags
static int count_tags(struct buffer_head *bh, int size)
Definition: recovery.c:182

journal_skip_recovery
int journal_skip_recovery(journal_t *journal)
Definition: recovery.c:282

wrap
#define wrap(journal, var)
Definition: recovery.c:207

journal_recover
int journal_recover(journal_t *journal)
Definition: recovery.c:225

jread
static int jread(struct buffer_head **bhp, journal_t *journal, unsigned int offset)
Definition: recovery.c:132

passtype
passtype
Definition: recovery.c:41

PASS_REPLAY
@ PASS_REPLAY
Definition: recovery.c:41

PASS_REVOKE
@ PASS_REVOKE
Definition: recovery.c:41

PASS_SCAN
@ PASS_SCAN
Definition: recovery.c:41

do_one_pass
static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass)
Definition: recovery.c:311

start
GLuint start
Definition: gl.h:1545

n
GLdouble n
Definition: glext.h:7729

size
GLsizeiptr size
Definition: glext.h:5919

offset
GLintptr offset
Definition: glext.h:5920

bufs
const GLenum * bufs
Definition: glext.h:6026

b
GLboolean GLboolean GLboolean b
Definition: glext.h:6204

flags
GLbitfield flags
Definition: glext.h:7161

tid_t
tid_t
Definition: ieframe.h:311

jbd.h

JFS_FLAG_LAST_TAG
#define JFS_FLAG_LAST_TAG
Definition: jbd.h:210

JFS_REVOKE_BLOCK
#define JFS_REVOKE_BLOCK
Definition: jbd.h:173

journal_block_tag_t
struct journal_block_tag_s journal_block_tag_t

JFS_DESCRIPTOR_BLOCK
#define JFS_DESCRIPTOR_BLOCK
Definition: jbd.h:169

JFS_FLAG_SAME_UUID
#define JFS_FLAG_SAME_UUID
Definition: jbd.h:208

JFS_COMMIT_BLOCK
#define JFS_COMMIT_BLOCK
Definition: jbd.h:170

JFS_MAGIC_NUMBER
#define JFS_MAGIC_NUMBER
Definition: jbd.h:159

journal_header_t
struct journal_header_s journal_header_t

tid_geq
static int tid_geq(tid_t x, tid_t y)
Definition: jbd.h:1091

JFS_FLAG_ESCAPE
#define JFS_FLAG_ESCAPE
Definition: jbd.h:207

if
if(dx< 0)
Definition: linetemp.h:194

memcpy
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878

module.h

cond_resched
static cond_resched()
Definition: module.h:437

lock_buffer
static void lock_buffer(struct buffer_head *bh)
Definition: module.h:1028

unlock_buffer
void unlock_buffer(struct buffer_head *bh)
Definition: linux.c:853

__getblk
struct buffer_head * __getblk(struct block_device *bdev, sector_t block, unsigned long size)
Definition: linux.c:799

printk
#define printk
Definition: module.h:231

wait_on_buffer
static void wait_on_buffer(struct buffer_head *bh)
Definition: module.h:1021

ll_rw_block
void ll_rw_block(int, int, struct buffer_head *bh[])
Definition: linux.c:862

be32_to_cpu
#define be32_to_cpu
Definition: module.h:157

brelse
static void brelse(struct buffer_head *bh)
Definition: module.h:955

cpu_to_be32
#define cpu_to_be32
Definition: module.h:156

sync_blockdev
int sync_blockdev(struct block_device *bdev)
Definition: linux.c:919

mark_buffer_dirty
void mark_buffer_dirty(struct buffer_head *bh)
Definition: linux.c:914

KERN_ERR
#define KERN_ERR
Definition: module.h:225

nr
ULONG nr
Definition: thread.c:7

next
static unsigned __int64 next
Definition: rand_nt.c:6

err
#define err(...)
Definition: reactos_support_code.h:30

journal_bmap
int journal_bmap(journal_t *journal, unsigned long blocknr, unsigned long *retp)
Definition: replay.c:484

journal_clear_revoke
void journal_clear_revoke(journal_t *journal)
Definition: revoke.c:689

journal_set_revoke
int journal_set_revoke(journal_t *journal, unsigned long blocknr, tid_t sequence)
Definition: revoke.c:646

journal_test_revoke
int journal_test_revoke(journal_t *journal, unsigned long blocknr, tid_t sequence)
Definition: revoke.c:670

do_readahead
static int do_readahead(mpg123_handle *fr, unsigned long newhead)
Definition: parse.c:1071

memset
#define memset(x, y, z)
Definition: compat.h:39

slab.h

buffer_head
Definition: module.h:723

buffer_head::b_data
char * b_data
Definition: module.h:735

header
Definition: winhttp_private.h:132

info
Definition: notification.c:61

journal_block_tag_s
Definition: jbd.h:190

journal_header_s
Definition: jbd.h:179

journal_header_s::h_blocktype
__be32 h_blocktype
Definition: jbd.h:181

journal_header_s::h_magic
__be32 h_magic
Definition: jbd.h:180

journal_header_s::h_sequence
__be32 h_sequence
Definition: jbd.h:182

journal_revoke_header_s
Definition: jbd.h:200

journal_superblock_s
Definition: jbd.h:217

recovery_info
Definition: recovery.c:32

recovery_info::start_transaction
tid_t start_transaction
Definition: recovery.c:33

recovery_info::nr_revokes
int nr_revokes
Definition: recovery.c:37

recovery_info::nr_revoke_hits
int nr_revoke_hits
Definition: recovery.c:38

recovery_info::end_transaction
tid_t end_transaction
Definition: recovery.c:34

recovery_info::nr_replays
int nr_replays
Definition: recovery.c:36

tag
Definition: ecma_167.h:138

max
#define max(a, b)
Definition: svc.c:63

pass
pass
Definition: typegen.h:25

READ
#define READ(_gif, _buf, _len)
Definition: ungif.c:87

success
#define success(from, fromstr, to, tostr)
Definition: win_iconv_test.c:158