From 57a45ced94fe48a701361d64230fc16eefa189dd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 25 Jan 2011 16:30:38 -0500 Subject: Btrfs: change reserved_extents to an atomic_t We track delayed allocation per inodes via 2 counters, one is outstanding_extents and reserved_extents. Outstanding_extents is already an atomic_t, but reserved_extents is not and is protected by a spinlock. So convert this to an atomic_t and instead of using a spinlock, use atomic_cmpxchg when releasing delalloc bytes. This makes our inode 72 bytes smaller, and reduces locking overhead (albiet it was minimal to begin with). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b3089b5c2df..27376c97d85f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3996,6 +3996,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; u64 to_reserve; int nr_extents; + int reserved_extents; int ret; if (btrfs_transaction_in_commit(root->fs_info)) @@ -4003,25 +4004,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); - spin_lock(&BTRFS_I(inode)->accounting_lock); nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; - if (nr_extents > BTRFS_I(inode)->reserved_extents) { - nr_extents -= BTRFS_I(inode)->reserved_extents; + reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); + + if (nr_extents > reserved_extents) { + nr_extents -= reserved_extents; to_reserve = calc_trans_metadata_size(root, nr_extents); } else { nr_extents = 0; to_reserve = 0; } - spin_unlock(&BTRFS_I(inode)->accounting_lock); + to_reserve += calc_csum_metadata_size(inode, num_bytes); ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); if (ret) return ret; - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents += nr_extents; + atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); atomic_inc(&BTRFS_I(inode)->outstanding_extents); - spin_unlock(&BTRFS_I(inode)->accounting_lock); block_rsv_add_bytes(block_rsv, to_reserve, 1); @@ -4036,20 +4036,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) struct btrfs_root *root = BTRFS_I(inode)->root; u64 to_free; int nr_extents; + int reserved_extents; num_bytes = ALIGN(num_bytes, root->sectorsize); atomic_dec(&BTRFS_I(inode)->outstanding_extents); WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); - spin_lock(&BTRFS_I(inode)->accounting_lock); - nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); - if (nr_extents < BTRFS_I(inode)->reserved_extents) { - nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; - BTRFS_I(inode)->reserved_extents -= nr_extents; - } else { - nr_extents = 0; - } - spin_unlock(&BTRFS_I(inode)->accounting_lock); + reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); + do { + int old, new; + + nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); + if (nr_extents >= reserved_extents) { + nr_extents = 0; + break; + } + old = reserved_extents; + nr_extents = reserved_extents - nr_extents; + new = reserved_extents - nr_extents; + old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, + reserved_extents, new); + if (likely(old == reserved_extents)) + break; + reserved_extents = old; + } while (1); to_free = calc_csum_metadata_size(inode, num_bytes); if (nr_extents > 0) -- cgit 1.4.1 From 66b4ffd110f9b48b8d8c1319ee446b53b8d073bf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 16:22:42 -0500 Subject: Btrfs: handle errors in btrfs_orphan_cleanup If we cannot truncate an inode for some reason we will never delete the orphan item associated with that inode, which means that we will loop forever in btrfs_orphan_cleanup. Instead of doing this just return error so we fail to mount. It sucks, but hey it's better than hanging. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 15 ++++++++++++--- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++---------------- fs/btrfs/ioctl.c | 4 +++- fs/btrfs/relocation.c | 2 +- 6 files changed, 50 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 34142d5647df..841330f3d68d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2529,7 +2529,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); -void btrfs_orphan_cleanup(struct btrfs_root *root); +int btrfs_orphan_cleanup(struct btrfs_root *root); void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d607bc7..495b1ac45f8c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2058,9 +2058,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { down_read(&fs_info->cleanup_work_sem); - btrfs_orphan_cleanup(fs_info->fs_root); - btrfs_orphan_cleanup(fs_info->tree_root); + err = btrfs_orphan_cleanup(fs_info->fs_root); + if (!err) + err = btrfs_orphan_cleanup(fs_info->tree_root); up_read(&fs_info->cleanup_work_sem); + if (err) { + close_ctree(tree_root); + return ERR_PTR(err); + } } return tree_root; @@ -2435,8 +2440,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[ret - 1]->root_key.objectid + 1; for (i = 0; i < ret; i++) { + int err; + root_objectid = gang[i]->root_key.objectid; - btrfs_orphan_cleanup(gang[i]); + err = btrfs_orphan_cleanup(gang[i]); + if (err) + return err; } root_objectid++; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 27376c97d85f..a8f4e8d2ba60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7619,7 +7619,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); BUG_ON(!reloc_root); - btrfs_orphan_cleanup(reloc_root); + ret = btrfs_orphan_cleanup(reloc_root); + BUG_ON(ret); return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d83025063ee7..0600265cb9b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2284,7 +2284,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) * this cleans up any orphans that may be left on the list from the last use * of this root. */ -void btrfs_orphan_cleanup(struct btrfs_root *root) +int btrfs_orphan_cleanup(struct btrfs_root *root) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -2294,10 +2294,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) - return; + return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; @@ -2306,11 +2309,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - printk(KERN_ERR "Error searching slot for orphan: %d" - "\n", ret); - break; - } + if (ret < 0) + goto out; /* * if ret == 0 means we found what we were searching for, which @@ -2318,6 +2318,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * find the key and see if we have stuff that matches */ if (ret > 0) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -2345,7 +2346,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - BUG_ON(IS_ERR(inode)); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } /* * add this inode to the orphan list so btrfs_orphan_del does @@ -2363,7 +2367,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) */ if (is_bad_inode(inode)) { trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2378,16 +2385,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; - btrfs_truncate(inode); + ret = btrfs_truncate(inode); } else { nr_unlink++; } /* this will do delete_inode and everything for us */ iput(inode); + if (ret) + goto out; } - btrfs_free_path(path); - root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; if (root->orphan_block_rsv) @@ -2396,14 +2403,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) if (root->orphan_block_rsv || root->orphan_item_inserted) { trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - btrfs_end_transaction(trans, root); + if (!IS_ERR(trans)) + btrfs_end_transaction(trans, root); } if (nr_unlink) printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); if (nr_truncate) printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); + +out: + if (ret) + printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); + btrfs_free_path(path); + return ret; } /* @@ -4156,8 +4169,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (!IS_ERR(inode) && root != sub_root) { down_read(&root->fs_info->cleanup_work_sem); if (!(inode->i_sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(sub_root); + ret = btrfs_orphan_cleanup(sub_root); up_read(&root->fs_info->cleanup_work_sem); + if (ret) + inode = ERR_PTR(ret); } return inode; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5fdb2abc4fa7..ad9b8c0e930b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -409,7 +409,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (ret) goto fail; - btrfs_orphan_cleanup(pending_snapshot->snap); + ret = btrfs_orphan_cleanup(pending_snapshot->snap); + if (ret) + goto fail; parent = dget_parent(dentry); inode = btrfs_lookup_dentry(parent->d_inode, dentry); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 31ade5802ae8..c863c8447015 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4209,7 +4209,7 @@ out: if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); else - btrfs_orphan_cleanup(fs_root); + err = btrfs_orphan_cleanup(fs_root); } return err; } -- cgit 1.4.1 From a826d6dcb32d811b4c81df57a5ef1367516586b0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Mar 2011 13:42:43 -0400 Subject: Btrfs: check items for correctness as we search Currently if we have corrupted items things will blow up in spectacular ways. So as we read in blocks and they are leaves, check the entire leaf to make sure all of the items are correct and point to valid parts in the leaf for the item data the are responsible for. If the item is corrupt we will kick back EIO and not read any of the copies since they are likely to not be correct either. This will catch generic corruptions, it will be up to the individual callers of btrfs_search_slot to make sure their items are right. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 123 ------------------------------------------------- fs/btrfs/disk-io.c | 90 +++++++++++++++++++++++++++++++++++- fs/btrfs/extent-tree.c | 5 ++ fs/btrfs/extent_io.h | 1 + 4 files changed, 95 insertions(+), 124 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b5baff0dccfe..73e53009e126 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -732,122 +732,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } -/* - * extra debugging checks to make sure all the items in a key are - * well formed and in the proper order - */ -static int check_node(struct btrfs_root *root, struct btrfs_path *path, - int level) -{ - struct extent_buffer *parent = NULL; - struct extent_buffer *node = path->nodes[level]; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key node_key; - int parent_slot; - int slot; - struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(node); - - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; - - slot = path->slots[level]; - BUG_ON(nritems == 0); - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_node_key(node, &node_key, 0); - BUG_ON(memcmp(&parent_key, &node_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(node)); - } - BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); - if (slot != 0) { - btrfs_node_key_to_cpu(node, &cpukey, slot - 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(comp_keys(&node_key, &cpukey) <= 0); - } - if (slot < nritems - 1) { - btrfs_node_key_to_cpu(node, &cpukey, slot + 1); - btrfs_node_key(node, &node_key, slot); - BUG_ON(comp_keys(&node_key, &cpukey) >= 0); - } - return 0; -} - -/* - * extra checking to make sure all the items in a leaf are - * well formed and in the proper order - */ -static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, - int level) -{ - struct extent_buffer *leaf = path->nodes[level]; - struct extent_buffer *parent = NULL; - int parent_slot; - struct btrfs_key cpukey; - struct btrfs_disk_key parent_key; - struct btrfs_disk_key leaf_key; - int slot = path->slots[0]; - - u32 nritems = btrfs_header_nritems(leaf); - - if (path->nodes[level + 1]) - parent = path->nodes[level + 1]; - - if (nritems == 0) - return 0; - - if (parent) { - parent_slot = path->slots[level + 1]; - btrfs_node_key(parent, &parent_key, parent_slot); - btrfs_item_key(leaf, &leaf_key, 0); - - BUG_ON(memcmp(&parent_key, &leaf_key, - sizeof(struct btrfs_disk_key))); - BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_bytenr(leaf)); - } - if (slot != 0 && slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); - if (comp_keys(&leaf_key, &cpukey) <= 0) { - btrfs_print_leaf(root, leaf); - printk(KERN_CRIT "slot %d offset bad key\n", slot); - BUG_ON(1); - } - if (btrfs_item_offset_nr(leaf, slot - 1) != - btrfs_item_end_nr(leaf, slot)) { - btrfs_print_leaf(root, leaf); - printk(KERN_CRIT "slot %d offset bad\n", slot); - BUG_ON(1); - } - } - if (slot < nritems - 1) { - btrfs_item_key(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); - BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - btrfs_print_leaf(root, leaf); - printk(KERN_CRIT "slot %d offset bad\n", slot); - BUG_ON(1); - } - } - BUG_ON(btrfs_item_offset_nr(leaf, 0) + - btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); - return 0; -} - -static noinline int check_block(struct btrfs_root *root, - struct btrfs_path *path, int level) -{ - return 0; - if (level == 0) - return check_leaf(root, path, level); - return check_node(root, path, level); -} /* * search for key in the extent_buffer. The items start at offset p, @@ -1188,7 +1072,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } } /* double check we haven't messed things up */ - check_block(root, path, level); if (orig_ptr != btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); @@ -1798,12 +1681,6 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = check_block(root, p, level); - if (ret) { - ret = -1; - goto done; - } - ret = bin_search(b, key, level, &slot); if (level != 0) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 495b1ac45f8c..9f31e110b481 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -323,6 +323,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, int num_copies = 0; int mirror_num = 0; + clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, @@ -331,6 +332,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, !verify_parent_transid(io_tree, eb, parent_transid)) return ret; + /* + * This buffer's crc is fine, but its contents are corrupted, so + * there is no reason to read the other copies, they won't be + * any less wrong. + */ + if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) + return ret; + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -419,6 +428,73 @@ static int check_tree_block_fsid(struct btrfs_root *root, return ret; } +#define CORRUPT(reason, eb, root, slot) \ + printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ + "root=%llu, slot=%d\n", reason, \ + (unsigned long long)btrfs_header_bytenr(eb), \ + (unsigned long long)root->objectid, slot) + +static noinline int check_leaf(struct btrfs_root *root, + struct extent_buffer *leaf) +{ + struct btrfs_key key; + struct btrfs_key leaf_key; + u32 nritems = btrfs_header_nritems(leaf); + int slot; + + if (nritems == 0) + return 0; + + /* Check the 0 item */ + if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != + BTRFS_LEAF_DATA_SIZE(root)) { + CORRUPT("invalid item offset size pair", leaf, root, 0); + return -EIO; + } + + /* + * Check to make sure each items keys are in the correct order and their + * offsets make sense. We only have to loop through nritems-1 because + * we check the current slot against the next slot, which verifies the + * next slot's offset+size makes sense and that the current's slot + * offset is correct. + */ + for (slot = 0; slot < nritems - 1; slot++) { + btrfs_item_key_to_cpu(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &key, slot + 1); + + /* Make sure the keys are in the right order */ + if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { + CORRUPT("bad key order", leaf, root, slot); + return -EIO; + } + + /* + * Make sure the offset and ends are right, remember that the + * item data starts at the end of the leaf and grows towards the + * front. + */ + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + CORRUPT("slot offset bad", leaf, root, slot); + return -EIO; + } + + /* + * Check to make sure that we don't point outside of the leaf, + * just incase all the items are consistent to eachother, but + * all point outside of the leaf. + */ + if (btrfs_item_end_nr(leaf, slot) > + BTRFS_LEAF_DATA_SIZE(root)) { + CORRUPT("slot end outside of leaf", leaf, root, slot); + return -EIO; + } + } + + return 0; +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) { @@ -485,8 +561,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, btrfs_set_buffer_lockdep_class(eb, found_level); ret = csum_tree_block(root, eb, 1); - if (ret) + if (ret) { ret = -EIO; + goto err; + } + + /* + * If this is a leaf block and it is corrupt, set the corrupt bit so + * that we don't try and read the other copies of this block, just + * return -EIO. + */ + if (found_level == 0 && check_leaf(root, eb)) { + set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); + ret = -EIO; + } end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a8f4e8d2ba60..cd794c35a636 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4754,6 +4754,11 @@ pin: } } out: + /* + * Deleting the buffer, clear the corrupt flag since it doesn't matter + * anymore. + */ + clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); btrfs_put_block_group(cache); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9318dfefd59c..f62c5442835d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -31,6 +31,7 @@ #define EXTENT_BUFFER_UPTODATE 0 #define EXTENT_BUFFER_BLOCKING 1 #define EXTENT_BUFFER_DIRTY 2 +#define EXTENT_BUFFER_CORRUPT 3 /* these are flags for extent_clear_unlock_delalloc */ #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 -- cgit 1.4.1 From 1abe9b8a138c9988ba8f7bfded6453649a31541f Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 24 Mar 2011 11:18:59 +0000 Subject: Btrfs: add initial tracepoint support for btrfs Tracepoints can provide insight into why btrfs hits bugs and be greatly helpful for debugging, e.g dd-7822 [000] 2121.641088: btrfs_inode_request: root = 5(FS_TREE), gen = 4, ino = 256, blocks = 8, disk_i_size = 0, last_trans = 8, logged_trans = 0 dd-7822 [000] 2121.641100: btrfs_inode_new: root = 5(FS_TREE), gen = 8, ino = 257, blocks = 0, disk_i_size = 0, last_trans = 0, logged_trans = 0 btrfs-transacti-7804 [001] 2146.935420: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29368320 (orig_level = 0), cow_buf = 29388800 (cow_level = 0) btrfs-transacti-7804 [001] 2146.935473: btrfs_cow_block: root = 1(ROOT_TREE), refs = 2, orig_buf = 29364224 (orig_level = 0), cow_buf = 29392896 (cow_level = 0) btrfs-transacti-7804 [001] 2146.972221: btrfs_transaction_commit: root = 1(ROOT_TREE), gen = 8 flush-btrfs-2-7821 [001] 2155.824210: btrfs_chunk_alloc: root = 3(CHUNK_TREE), offset = 1103101952, size = 1073741824, num_stripes = 1, sub_stripes = 0, type = DATA flush-btrfs-2-7821 [001] 2155.824241: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29388800 (orig_level = 0), cow_buf = 29396992 (cow_level = 0) flush-btrfs-2-7821 [001] 2155.824255: btrfs_cow_block: root = 4(DEV_TREE), refs = 2, orig_buf = 29372416 (orig_level = 0), cow_buf = 29401088 (cow_level = 0) flush-btrfs-2-7821 [000] 2155.824329: btrfs_cow_block: root = 3(CHUNK_TREE), refs = 2, orig_buf = 20971520 (orig_level = 0), cow_buf = 20975616 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898019: btrfs_cow_block: root = 5(FS_TREE), refs = 2, orig_buf = 29384704 (orig_level = 0), cow_buf = 29405184 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898043: btrfs_cow_block: root = 7(CSUM_TREE), refs = 2, orig_buf = 29376512 (orig_level = 0), cow_buf = 29409280 (cow_level = 0) Here is what I have added: 1) ordere_extent: btrfs_ordered_extent_add btrfs_ordered_extent_remove btrfs_ordered_extent_start btrfs_ordered_extent_put These provide critical information to understand how ordered_extents are updated. 2) extent_map: btrfs_get_extent extent_map is used in both read and write cases, and it is useful for tracking how btrfs specific IO is running. 3) writepage: __extent_writepage btrfs_writepage_end_io_hook Pages are cirtical resourses and produce a lot of corner cases during writeback, so it is valuable to know how page is written to disk. 4) inode: btrfs_inode_new btrfs_inode_request btrfs_inode_evict These can show where and when a inode is created, when a inode is evicted. 5) sync: btrfs_sync_file btrfs_sync_fs These show sync arguments. 6) transaction: btrfs_transaction_commit In transaction based filesystem, it will be useful to know the generation and who does commit. 7) back reference and cow: btrfs_delayed_tree_ref btrfs_delayed_data_ref btrfs_delayed_ref_head btrfs_cow_block Btrfs natively supports back references, these tracepoints are helpful on understanding btrfs's COW mechanism. 8) chunk: btrfs_chunk_alloc btrfs_chunk_free Chunk is a link between physical offset and logical offset, and stands for space infomation in btrfs, and these are helpful on tracing space things. 9) reserved_extent: btrfs_reserved_extent_alloc btrfs_reserved_extent_free These can show how btrfs uses its space. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 + fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-ref.c | 6 + fs/btrfs/extent-tree.c | 4 + fs/btrfs/extent_io.c | 2 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 12 + fs/btrfs/ordered-data.c | 8 + fs/btrfs/super.c | 5 + fs/btrfs/transaction.c | 2 + fs/btrfs/volumes.c | 16 +- fs/btrfs/volumes.h | 11 + include/trace/events/btrfs.h | 667 +++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 727 insertions(+), 11 deletions(-) create mode 100644 include/trace/events/btrfs.h (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8680110f0a5a..465b5d7d6b48 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -535,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); + + trace_btrfs_cow_block(root, buf, *cow_ret); + return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0ee679b6c1b7..9d0f59142afa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent_map.h" diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..bce28f653899 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&head_ref->cluster); mutex_init(&head_ref->mutex); + trace_btrfs_delayed_ref_head(ref, head_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { @@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, } full_ref->level = level; + trace_btrfs_delayed_tree_ref(ref, full_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { @@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, full_ref->objectid = owner; full_ref->offset = offset; + trace_btrfs_delayed_data_ref(ref, full_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd794c35a636..86ea471d3801 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5412,6 +5412,8 @@ again: dump_space_info(sinfo, num_bytes, 1); } + trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); + return ret; } @@ -5433,6 +5435,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) update_reserved_bytes(cache, len, 0, 1); btrfs_put_block_group(cache); + trace_btrfs_reserved_extent_free(root, start, len); + return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1bbd26b4fc5c..77c65a0bea34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2192,6 +2192,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, else write_flags = WRITE; + trace___extent_writepage(page, inode, wbc); + WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a85b044cf39e..656bc0a892b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1205,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync) int ret = 0; struct btrfs_trans_handle *trans; + trace_btrfs_sync_file(file, datasync); /* we wait first, since the writeback may change the inode */ root->log_batch++; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9813bd7d556..eaa271484199 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1787,6 +1787,8 @@ out: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -3718,6 +3720,8 @@ void btrfs_evict_inode(struct inode *inode) unsigned long nr; int ret; + trace_btrfs_inode_evict(inode); + truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || root == root->fs_info->tree_root)) @@ -4510,6 +4514,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { + trace_btrfs_inode_request(dir); + ret = btrfs_set_inode_index(dir, index); if (ret) { iput(inode); @@ -4584,6 +4590,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, insert_inode_hash(inode); inode_tree_add(inode); + + trace_btrfs_inode_new(inode); + return inode; fail: if (dir) @@ -5261,6 +5270,9 @@ insert: } write_unlock(&em_tree->lock); out: + + trace_btrfs_get_extent(root, em); + if (path) btrfs_free_path(path); if (trans) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 083a55477375..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, INIT_LIST_HEAD(&entry->list); INIT_LIST_HEAD(&entry->root_extent_list); + trace_btrfs_ordered_extent_add(inode, entry); + spin_lock(&tree->lock); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); @@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) struct list_head *cur; struct btrfs_ordered_sum *sum; + trace_btrfs_ordered_extent_put(entry->inode, entry); + if (atomic_dec_and_test(&entry->refs)) { while (!list_empty(&entry->list)) { cur = entry->list.next; @@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); + trace_btrfs_ordered_extent_remove(inode, entry); + /* * we have no more ordered extents for this inode and * no dirty pages. We can safely remove it from the @@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, u64 start = entry->file_offset; u64 end = start + entry->len - 1; + trace_btrfs_ordered_extent_start(inode, entry); + /* * pages in the range can be dirty, clean or writeback. We * start IO on any dirty ones so the wait doesn't stall waiting diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d39a9895d932..2edfc039f098 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,9 @@ #include "export.h" #include "compression.h" +#define CREATE_TRACE_POINTS +#include + static const struct super_operations btrfs_super_ops; static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, @@ -620,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_root *root = btrfs_sb(sb); int ret; + trace_btrfs_sync_fs(wait); + if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3d73c8d93bbb..5b4bc685bb0e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1389,6 +1389,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + trace_btrfs_transaction_commit(root); + mutex_unlock(&root->fs_info->trans_mutex); if (current->journal_info == trans) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee40..8ba3c9ebff93 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,17 +33,6 @@ #include "volumes.h" #include "async-thread.h" -struct map_lookup { - u64 type; - int io_align; - int io_width; - int stripe_len; - int sector_size; - int num_stripes; - int sub_stripes; - struct btrfs_bio_stripe stripes[]; -}; - static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); @@ -1923,6 +1912,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); + trace_btrfs_chunk_free(root, map, chunk_offset, em->len); + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); @@ -2650,6 +2641,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, *num_bytes = chunk_bytes_by_type(type, calc_size, map->num_stripes, sub_stripes); + trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); + em = alloc_extent_map(GFP_NOFS); if (!em) { ret = -ENOMEM; @@ -2758,6 +2751,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, item_size); BUG_ON(ret); } + kfree(chunk); return 0; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7fb59d45fe8c..7b38d0668b51 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -145,6 +145,17 @@ struct btrfs_device_info { u64 max_avail; }; +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + int sub_stripes; + struct btrfs_bio_stripe stripes[]; +}; + /* Used to sort the devices by max_avail(descending sort) */ int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h new file mode 100644 index 000000000000..f445cff66ab7 --- /dev/null +++ b/include/trace/events/btrfs.h @@ -0,0 +1,667 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM btrfs + +#if !defined(_TRACE_BTRFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BTRFS_H + +#include +#include + +struct btrfs_root; +struct btrfs_fs_info; +struct btrfs_inode; +struct extent_map; +struct btrfs_ordered_extent; +struct btrfs_delayed_ref_node; +struct btrfs_delayed_tree_ref; +struct btrfs_delayed_data_ref; +struct btrfs_delayed_ref_head; +struct map_lookup; +struct extent_buffer; + +#define show_ref_type(type) \ + __print_symbolic(type, \ + { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ + { BTRFS_EXTENT_DATA_REF_KEY, "EXTENT_DATA_REF" }, \ + { BTRFS_EXTENT_REF_V0_KEY, "EXTENT_REF_V0" }, \ + { BTRFS_SHARED_BLOCK_REF_KEY, "SHARED_BLOCK_REF" }, \ + { BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" }) + +#define __show_root_type(obj) \ + __print_symbolic(obj, \ + { BTRFS_ROOT_TREE_OBJECTID, "ROOT_TREE" }, \ + { BTRFS_EXTENT_TREE_OBJECTID, "EXTENT_TREE" }, \ + { BTRFS_CHUNK_TREE_OBJECTID, "CHUNK_TREE" }, \ + { BTRFS_DEV_TREE_OBJECTID, "DEV_TREE" }, \ + { BTRFS_FS_TREE_OBJECTID, "FS_TREE" }, \ + { BTRFS_ROOT_TREE_DIR_OBJECTID, "ROOT_TREE_DIR" }, \ + { BTRFS_CSUM_TREE_OBJECTID, "CSUM_TREE" }, \ + { BTRFS_TREE_LOG_OBJECTID, "TREE_LOG" }, \ + { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ + { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) + +#define show_root_type(obj) \ + obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \ + (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-" + +TRACE_EVENT(btrfs_transaction_commit, + + TP_PROTO(struct btrfs_root *root), + + TP_ARGS(root), + + TP_STRUCT__entry( + __field( u64, generation ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->generation = root->fs_info->generation; + __entry->root_objectid = root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), gen = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->generation) +); + +DECLARE_EVENT_CLASS(btrfs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( blkcnt_t, blocks ) + __field( u64, disk_i_size ) + __field( u64, generation ) + __field( u64, last_trans ) + __field( u64, logged_trans ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->blocks = inode->i_blocks; + __entry->disk_i_size = BTRFS_I(inode)->disk_i_size; + __entry->generation = BTRFS_I(inode)->generation; + __entry->last_trans = BTRFS_I(inode)->last_trans; + __entry->logged_trans = BTRFS_I(inode)->logged_trans; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, " + "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->generation, + (unsigned long)__entry->ino, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->disk_i_size, + (unsigned long long)__entry->last_trans, + (unsigned long long)__entry->logged_trans) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_new, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_request, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +#define __show_map_type(type) \ + __print_symbolic(type, \ + { EXTENT_MAP_LAST_BYTE, "LAST_BYTE" }, \ + { EXTENT_MAP_HOLE, "HOLE" }, \ + { EXTENT_MAP_INLINE, "INLINE" }, \ + { EXTENT_MAP_DELALLOC, "DELALLOC" }) + +#define show_map_type(type) \ + type, (type >= EXTENT_MAP_LAST_BYTE) ? "-" : __show_map_type(type) + +#define show_map_flags(flag) \ + __print_flags(flag, "|", \ + { EXTENT_FLAG_PINNED, "PINNED" }, \ + { EXTENT_FLAG_COMPRESSED, "COMPRESSED" }, \ + { EXTENT_FLAG_VACANCY, "VACANCY" }, \ + { EXTENT_FLAG_PREALLOC, "PREALLOC" }) + +TRACE_EVENT(btrfs_get_extent, + + TP_PROTO(struct btrfs_root *root, struct extent_map *map), + + TP_ARGS(root, map), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, start ) + __field( u64, len ) + __field( u64, orig_start ) + __field( u64, block_start ) + __field( u64, block_len ) + __field( unsigned long, flags ) + __field( int, refs ) + __field( unsigned int, compress_type ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->start = map->start; + __entry->len = map->len; + __entry->orig_start = map->orig_start; + __entry->block_start = map->block_start; + __entry->block_len = map->block_len; + __entry->flags = map->flags; + __entry->refs = atomic_read(&map->refs); + __entry->compress_type = map->compress_type; + ), + + TP_printk("root = %llu(%s), start = %llu, len = %llu, " + "orig_start = %llu, block_start = %llu(%s), " + "block_len = %llu, flags = %s, refs = %u, " + "compress_type = %u", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->start, + (unsigned long long)__entry->len, + (unsigned long long)__entry->orig_start, + show_map_type(__entry->block_start), + (unsigned long long)__entry->block_len, + show_map_flags(__entry->flags), + __entry->refs, __entry->compress_type) +); + +#define show_ordered_flags(flags) \ + __print_symbolic(flags, \ + { BTRFS_ORDERED_IO_DONE, "IO_DONE" }, \ + { BTRFS_ORDERED_COMPLETE, "COMPLETE" }, \ + { BTRFS_ORDERED_NOCOW, "NOCOW" }, \ + { BTRFS_ORDERED_COMPRESSED, "COMPRESSED" }, \ + { BTRFS_ORDERED_PREALLOC, "PREALLOC" }, \ + { BTRFS_ORDERED_DIRECT, "DIRECT" }) + +DECLARE_EVENT_CLASS(btrfs__ordered_extent, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( u64, file_offset ) + __field( u64, start ) + __field( u64, len ) + __field( u64, disk_len ) + __field( u64, bytes_left ) + __field( unsigned long, flags ) + __field( int, compress_type ) + __field( int, refs ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->file_offset = ordered->file_offset; + __entry->start = ordered->start; + __entry->len = ordered->len; + __entry->disk_len = ordered->disk_len; + __entry->bytes_left = ordered->bytes_left; + __entry->flags = ordered->flags; + __entry->compress_type = ordered->compress_type; + __entry->refs = atomic_read(&ordered->refs); + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %llu, file_offset = %llu, " + "start = %llu, len = %llu, disk_len = %llu, " + "bytes_left = %llu, flags = %s, compress_type = %d, " + "refs = %d", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->ino, + (unsigned long long)__entry->file_offset, + (unsigned long long)__entry->start, + (unsigned long long)__entry->len, + (unsigned long long)__entry->disk_len, + (unsigned long long)__entry->bytes_left, + show_ordered_flags(__entry->flags), + __entry->compress_type, __entry->refs) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_add, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_remove, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_start, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DECLARE_EVENT_CLASS(btrfs__writepage, + + TP_PROTO(struct page *page, struct inode *inode, + struct writeback_control *wbc), + + TP_ARGS(page, inode, wbc), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( pgoff_t, index ) + __field( long, nr_to_write ) + __field( long, pages_skipped ) + __field( loff_t, range_start ) + __field( loff_t, range_end ) + __field( char, nonblocking ) + __field( char, for_kupdate ) + __field( char, for_reclaim ) + __field( char, range_cyclic ) + __field( pgoff_t, writeback_index ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->index = page->index; + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->nonblocking = wbc->nonblocking; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, " + "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, " + "range_end = %llu, nonblocking = %d, for_kupdate = %d, " + "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, __entry->index, + __entry->nr_to_write, __entry->pages_skipped, + __entry->range_start, __entry->range_end, + __entry->nonblocking, __entry->for_kupdate, + __entry->for_reclaim, __entry->range_cyclic, + (unsigned long)__entry->writeback_index) +); + +DEFINE_EVENT(btrfs__writepage, __extent_writepage, + + TP_PROTO(struct page *page, struct inode *inode, + struct writeback_control *wbc), + + TP_ARGS(page, inode, wbc) +); + +TRACE_EVENT(btrfs_writepage_end_io_hook, + + TP_PROTO(struct page *page, u64 start, u64 end, int uptodate), + + TP_ARGS(page, start, end, uptodate), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( pgoff_t, index ) + __field( u64, start ) + __field( u64, end ) + __field( int, uptodate ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->start = start; + __entry->end = end; + __entry->uptodate = uptodate; + __entry->root_objectid = + BTRFS_I(page->mapping->host)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, " + "end = %llu, uptodate = %d", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, (unsigned long)__entry->index, + (unsigned long long)__entry->start, + (unsigned long long)__entry->end, __entry->uptodate) +); + +TRACE_EVENT(btrfs_sync_file, + + TP_PROTO(struct file *file, int datasync), + + TP_ARGS(file, datasync), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( ino_t, parent ) + __field( int, datasync ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + + __entry->ino = inode->i_ino; + __entry->parent = dentry->d_parent->d_inode->i_ino; + __entry->datasync = datasync; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, (unsigned long)__entry->parent, + __entry->datasync) +); + +TRACE_EVENT(btrfs_sync_fs, + + TP_PROTO(int wait), + + TP_ARGS(wait), + + TP_STRUCT__entry( + __field( int, wait ) + ), + + TP_fast_assign( + __entry->wait = wait; + ), + + TP_printk("wait = %d", __entry->wait) +); + +#define show_ref_action(action) \ + __print_symbolic(action, \ + { BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \ + { BTRFS_DROP_DELAYED_REF, "DROP_DELAYED_REF" }, \ + { BTRFS_ADD_DELAYED_EXTENT, "ADD_DELAYED_EXTENT" }, \ + { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) + + +TRACE_EVENT(btrfs_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( u64, parent ) + __field( u64, ref_root ) + __field( int, level ) + __field( int, type ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->parent = full_ref->parent; + __entry->ref_root = full_ref->root; + __entry->level = full_ref->level; + __entry->type = ref->type; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, " + "parent = %llu(%s), ref_root = %llu(%s), level = %d, " + "type = %s", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + show_root_type(__entry->parent), + show_root_type(__entry->ref_root), + __entry->level, show_ref_type(__entry->type)) +); + +TRACE_EVENT(btrfs_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( u64, parent ) + __field( u64, ref_root ) + __field( u64, owner ) + __field( u64, offset ) + __field( int, type ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->parent = full_ref->parent; + __entry->ref_root = full_ref->root; + __entry->owner = full_ref->objectid; + __entry->offset = full_ref->offset; + __entry->type = ref->type; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, " + "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, " + "offset = %llu, type = %s", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + show_root_type(__entry->parent), + show_root_type(__entry->ref_root), + (unsigned long long)__entry->owner, + (unsigned long long)__entry->offset, + show_ref_type(__entry->type)) +); + +TRACE_EVENT(btrfs_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( int, is_data ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->is_data = head_ref->is_data; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + __entry->is_data) +); + +#define show_chunk_type(type) \ + __print_flags(type, "|", \ + { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ + { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \ + { BTRFS_BLOCK_GROUP_METADATA, "METADATA"}, \ + { BTRFS_BLOCK_GROUP_RAID0, "RAID0" }, \ + { BTRFS_BLOCK_GROUP_RAID1, "RAID1" }, \ + { BTRFS_BLOCK_GROUP_DUP, "DUP" }, \ + { BTRFS_BLOCK_GROUP_RAID10, "RAID10"}) + +DECLARE_EVENT_CLASS(btrfs__chunk, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size), + + TP_STRUCT__entry( + __field( int, num_stripes ) + __field( u64, type ) + __field( int, sub_stripes ) + __field( u64, offset ) + __field( u64, size ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->num_stripes = map->num_stripes; + __entry->type = map->type; + __entry->sub_stripes = map->sub_stripes; + __entry->offset = offset; + __entry->size = size; + __entry->root_objectid = root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), offset = %llu, size = %llu, " + "num_stripes = %d, sub_stripes = %d, type = %s", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->offset, + (unsigned long long)__entry->size, + __entry->num_stripes, __entry->sub_stripes, + show_chunk_type(__entry->type)) +); + +DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size) +); + +DEFINE_EVENT(btrfs__chunk, btrfs_chunk_free, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size) +); + +TRACE_EVENT(btrfs_cow_block, + + TP_PROTO(struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow), + + TP_ARGS(root, buf, cow), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, buf_start ) + __field( int, refs ) + __field( u64, cow_start ) + __field( int, buf_level ) + __field( int, cow_level ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->buf_start = buf->start; + __entry->refs = atomic_read(&buf->refs); + __entry->cow_start = cow->start; + __entry->buf_level = btrfs_header_level(buf); + __entry->cow_level = btrfs_header_level(cow); + ), + + TP_printk("root = %llu(%s), refs = %d, orig_buf = %llu " + "(orig_level = %d), cow_buf = %llu (cow_level = %d)", + show_root_type(__entry->root_objectid), + __entry->refs, + (unsigned long long)__entry->buf_start, + __entry->buf_level, + (unsigned long long)__entry->cow_start, + __entry->cow_level) +); + +DECLARE_EVENT_CLASS(btrfs__reserved_extent, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, start ) + __field( u64, len ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("root = %llu(%s), start = %llu, len = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->start, + (unsigned long long)__entry->len) +); + +DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_alloc, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len) +); + +DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len) +); + +#endif /* _TRACE_BTRFS_H */ + +/* This part must be outside protection */ +#include -- cgit 1.4.1 From db5b493ac78e46c7b6bad22cd25d8041564cd8ea Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 23 Mar 2011 08:14:16 +0000 Subject: Btrfs: cleanup some BUG_ON() This patch changes some BUG_ON() to the error return. (but, most callers still use BUG_ON()) Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 ++- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/extent-tree.c | 25 ++++++++++++++++++------- fs/btrfs/file-item.c | 3 ++- fs/btrfs/inode-map.c | 3 ++- fs/btrfs/ioctl.c | 5 ++++- fs/btrfs/root-tree.c | 6 ++++-- fs/btrfs/transaction.c | 12 +++++++++--- fs/btrfs/tree-log.c | 15 +++++++++------ 9 files changed, 54 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 465b5d7d6b48..4edcbe915736 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3709,7 +3709,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root unsigned long ptr; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { leaf = path->nodes[0]; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9f31e110b481..00cbb41af660 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1248,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, root, fs_info, location->objectid); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + kfree(root); + return ERR_PTR(-ENOMEM); + } ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret == 0) { l = path->nodes[0]; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 86ea471d3801..a6a8159c5d1e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5463,7 +5463,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, @@ -6457,10 +6458,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; wc = kzalloc(sizeof(*wc), GFP_NOFS); - BUG_ON(!wc); + if (!wc) { + btrfs_free_path(path); + return -ENOMEM; + } btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); @@ -6918,7 +6923,11 @@ static noinline int get_new_locations(struct inode *reloc_inode, } path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + if (exts != *extents) + kfree(exts); + return -ENOMEM; + } cur_pos = extent_key->objectid - offset; last_byte = extent_key->objectid + extent_key->offset; @@ -7442,7 +7451,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, int ret; new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); - BUG_ON(!new_extent); + if (!new_extent) + return -ENOMEM; ref = btrfs_lookup_leaf_ref(root, leaf->start); BUG_ON(!ref); @@ -7647,7 +7657,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, return 0; root_item = kmalloc(sizeof(*root_item), GFP_NOFS); - BUG_ON(!root_item); + if (!root_item) + return -ENOMEM; ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); @@ -7673,7 +7684,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, &root_key); - BUG_ON(!reloc_root); + BUG_ON(IS_ERR(reloc_root)); reloc_root->last_trans = trans->transid; reloc_root->commit_root = NULL; reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4f19a3e1bf32..a2134195a85e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; file_key.objectid = objectid; file_key.offset = pos; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..c05a08f4c411 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) int slot; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; search_key.objectid = BTRFS_LAST_FREE_OBJECTID; search_key.type = -1; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ad9b8c0e930b..88d3cb2eaf75 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2350,12 +2350,15 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; struct btrfs_trans_handle *trans; u64 transid; + int ret; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) return PTR_ERR(trans); transid = trans->transid; - btrfs_commit_transaction_async(trans, root, 0); + ret = btrfs_commit_transaction_async(trans, root, 0); + if (ret) + return ret; if (argp) if (copy_to_user(argp, &transid, sizeof(transid))) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6a1086e83ffc..29b2d7c930eb 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.offset = (u64)-1; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; @@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5b4bc685bb0e..ce48eb59d615 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,7 +57,8 @@ static noinline int join_transaction(struct btrfs_root *root) if (!cur_trans) { cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); - BUG_ON(!cur_trans); + if (!cur_trans) + return -ENOMEM; root->fs_info->generation++; cur_trans->num_writers = 1; cur_trans->num_joined = 0; @@ -195,7 +196,11 @@ again: wait_current_trans(root); ret = join_transaction(root); - BUG_ON(ret); + if (ret < 0) { + if (type != TRANS_JOIN_NOLOCK) + mutex_unlock(&root->fs_info->trans_mutex); + return ERR_PTR(ret); + } cur_trans = root->fs_info->running_transaction; cur_trans->use_count++; @@ -1156,7 +1161,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; ac = kmalloc(sizeof(*ac), GFP_NOFS); - BUG_ON(!ac); + if (!ac) + return -ENOMEM; INIT_DELAYED_WORK(&ac->work, do_async_commit); ac->root = root; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 429cfcfadf90..f9425e33e358 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1828,7 +1828,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, int orig_level; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; level = btrfs_header_level(log->node); orig_level = level; @@ -3114,9 +3115,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) .stage = 0, }; - fs_info->log_root_recovering = 1; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + + fs_info->log_root_recovering = 1; trans = btrfs_start_transaction(fs_info->tree_root, 0); BUG_ON(IS_ERR(trans)); @@ -3124,7 +3127,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) wc.trans = trans; wc.pin = 1; - walk_log_tree(trans, log_root_tree, &wc); + ret = walk_log_tree(trans, log_root_tree, &wc); + BUG_ON(ret); again: key.objectid = BTRFS_TREE_LOG_OBJECTID; @@ -3148,8 +3152,7 @@ again: log = btrfs_read_fs_root_no_radix(log_root_tree, &found_key); - BUG_ON(!log); - + BUG_ON(IS_ERR(log)); tmp_key.objectid = found_key.offset; tmp_key.type = BTRFS_ROOT_ITEM_KEY; -- cgit 1.4.1 From 97d9a8a420444eb5b5c071d4b3b9c4100a7ae015 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 24 Mar 2011 06:33:21 +0000 Subject: Btrfs: check return value of read_tree_block() This patch is checking return value of read_tree_block(), and if it is NULL, error processing. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +++ fs/btrfs/extent-tree.c | 6 ++++++ fs/btrfs/relocation.c | 6 ++++++ 3 files changed, 15 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4edcbe915736..84d7ca1fe0ba 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -682,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (!cur) { cur = read_tree_block(root, blocknr, blocksize, gen); + if (!cur) + return -EIO; } else if (!uptodate) { btrfs_read_buffer(cur, gen); } @@ -4087,6 +4089,7 @@ find_next_key: } btrfs_set_path_blocking(path); cur = read_node_slot(root, cur, slot); + BUG_ON(!cur); btrfs_tree_lock(cur); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a6a8159c5d1e..5bc658a9d85c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6067,6 +6067,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (reada && level == 1) reada_walk_down(trans, root, wc, path); next = read_tree_block(root, bytenr, blocksize, generation); + if (!next) + return -EIO; btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } @@ -7937,6 +7939,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, eb = read_tree_block(found_root, block_start, block_size, 0); + if (!eb) { + ret = -EIO; + goto out; + } btrfs_tree_lock(eb); BUG_ON(level != btrfs_header_level(eb)); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c863c8447015..58250e09eb05 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1724,6 +1724,7 @@ again: eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); + BUG_ON(!eb); btrfs_tree_lock(eb); if (cow) { ret = btrfs_cow_block(trans, dest, eb, parent, @@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = btrfs_level_size(root, node->level); generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); + if (!eb) { + err = -EIO; + goto next; + } btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); @@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); + BUG_ON(!eb); WARN_ON(btrfs_header_level(eb) != block->level); if (block->level == 0) btrfs_item_key_to_cpu(eb, &block->key, 0); -- cgit 1.4.1 From fc0e4a314e361af3b13d9320e92c64118f9a3e61 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 24 Mar 2011 11:41:21 +0000 Subject: btrfs: use GFP_NOFS instead of GFP_KERNEL In the filesystem context, we must allocate memory by GFP_NOFS, or we may start another filesystem operation and make kswap thread hang up. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5bc658a9d85c..7922f296420d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -471,7 +471,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, if (load_cache_only) return 0; - caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); BUG_ON(!caching_ctl); INIT_LIST_HEAD(&caching_ctl->list); @@ -1743,7 +1743,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); + blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, -- cgit 1.4.1 From b4d00d569a49fcef02195635dbf8d15904b1fb63 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:25 +0000 Subject: Btrfs: make update_reserved_bytes() public Make the function public as we should update the reserved extents calculations after taking out an extent for trimming. Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 16 +++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8302ecd4197f..9e21176cdf57 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7922f296420d..0671f5b77eb8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -4233,8 +4231,8 @@ int btrfs_pin_extent(struct btrfs_root *root, * update size of reserved extents. this function may return -EAGAIN * if 'reserve' is true or 'sinfo' is false. */ -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo) +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo) { int ret = 0; if (sinfo) { @@ -4714,10 +4712,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - ret = update_reserved_bytes(cache, buf->len, 0, 0); + ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); if (ret == -EAGAIN) { /* block group became read-only */ - update_reserved_bytes(cache, buf->len, 0, 1); + btrfs_update_reserved_bytes(cache, buf->len, 0, 1); goto out; } @@ -5206,7 +5204,7 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = update_reserved_bytes(block_group, num_bytes, 1, + ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, (data & BTRFS_BLOCK_GROUP_DATA)); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); @@ -5432,7 +5430,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ret = btrfs_discard_extent(root, start, len); btrfs_add_free_space(cache, start, len); - update_reserved_bytes(cache, len, 0, 1); + btrfs_update_reserved_bytes(cache, len, 0, 1); btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -5634,7 +5632,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, put_caching_control(caching_ctl); } - ret = update_reserved_bytes(block_group, ins->offset, 1, 1); + ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, -- cgit 1.4.1 From 5378e60734f5b7bfe1b43dc191aaf6131c1befe7 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:27 +0000 Subject: Btrfs: adjust btrfs_discard_extent() return errors and trimmed bytes Callers of btrfs_discard_extent() should check if we are mounted with -o discard, as we want to make fitrim to work even the fs is not mounted with -o discard. Also we should use REQ_DISCARD to map the free extent to get a full mapping, last we only return errors if 1. the error is not a EOPNOTSUPP 2. no device supports discard Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/extent-tree.c | 43 ++++++++++++++++++++++++++----------------- 3 files changed, 31 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9e21176cdf57..a18b7bc2b22c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2229,7 +2229,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 125639ddaffe..8ecc5419d8b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3054,7 +3054,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, + end + 1 - start, + NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0671f5b77eb8..e990d2d1ba4a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1738,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static void btrfs_issue_discard(struct block_device *bdev, +static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); + return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { int ret; - u64 map_length = num_bytes; + u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - if (!btrfs_test_opt(root, DISCARD)) - return 0; /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &map_length, &multi, 0); + ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, + bytenr, &num_bytes, &multi, 0); if (!ret) { struct btrfs_bio_stripe *stripe = multi->stripes; int i; - if (map_length > num_bytes) - map_length = num_bytes; for (i = 0; i < multi->num_stripes; i++, stripe++) { - btrfs_issue_discard(stripe->dev->bdev, - stripe->physical, - map_length); + ret = btrfs_issue_discard(stripe->dev->bdev, + stripe->physical, + stripe->length); + if (!ret) + discarded_bytes += stripe->length; + else if (ret != -EOPNOTSUPP) + break; } kfree(multi); } + if (discarded_bytes && ret == -EOPNOTSUPP) + ret = 0; + + if (actual_bytes) + *actual_bytes = discarded_bytes; + return ret; } @@ -4371,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; - ret = btrfs_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, + end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); unpin_extent_range(root, start, end); @@ -5427,7 +5435,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } - ret = btrfs_discard_extent(root, start, len); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); btrfs_update_reserved_bytes(cache, len, 0, 1); @@ -8765,7 +8774,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { - return btrfs_discard_extent(root, bytenr, num_bytes); + return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } -- cgit 1.4.1 From f7039b1d5c32241f87a513e33120db36bf30264d Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:28 +0000 Subject: Btrfs: add btrfs_trim_fs() to handle FITRIM We take an free extent out from allocator, trim it, then put it back, but before we trim the block group, we should make sure the block group is cached, so plus a little change to make cache_block_group() run without a transaction. Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 50 +++++++++++++++++++++++- fs/btrfs/free-space-cache.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/free-space-cache.h | 2 + fs/btrfs/ioctl.c | 46 +++++++++++++++++++++++ 5 files changed, 190 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a18b7bc2b22c..93a0191aded6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2232,6 +2232,7 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e990d2d1ba4a..1efeda3b2f6f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -440,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans->transaction->in_commit && + if (trans && (!trans->transaction->in_commit) && (root && root != root->fs_info->tree_root)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { @@ -8778,3 +8778,51 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, { return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } + +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 group_trimmed; + u64 start; + u64 end; + u64 trimmed = 0; + int ret = 0; + + cache = btrfs_lookup_block_group(fs_info, range->start); + + while (cache) { + if (cache->key.objectid >= (range->start + range->len)) { + btrfs_put_block_group(cache); + break; + } + + start = max(range->start, cache->key.objectid); + end = min(range->start + range->len, + cache->key.objectid + cache->key.offset); + + if (end - start >= range->minlen) { + if (!block_group_cache_done(cache)) { + ret = cache_block_group(cache, NULL, root, 0); + if (!ret) + wait_block_group_cache_done(cache); + } + ret = btrfs_trim_block_group(cache, + &group_trimmed, + start, + end, + range->minlen); + + trimmed += group_trimmed; + if (ret) { + btrfs_put_block_group(cache); + break; + } + } + + cache = next_block_group(fs_info->tree_root, cache); + } + + range->len = trimmed; + return ret; +} diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f03ef97c3b21..0037427d8a9d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2178,3 +2178,95 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) cluster->block_group = NULL; } +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space *entry = NULL; + struct btrfs_fs_info *fs_info = block_group->fs_info; + u64 bytes = 0; + u64 actually_trimmed; + int ret = 0; + + *trimmed = 0; + + while (start < end) { + spin_lock(&block_group->tree_lock); + + if (block_group->free_space < minlen) { + spin_unlock(&block_group->tree_lock); + break; + } + + entry = tree_search_offset(block_group, start, 0, 1); + if (!entry) + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, + start), + 1, 1); + + if (!entry || entry->offset >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + + if (entry->bitmap) { + ret = search_bitmap(block_group, entry, &start, &bytes); + if (!ret) { + if (start >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + bytes = min(bytes, end - start); + bitmap_clear_bits(block_group, entry, + start, bytes); + if (entry->bytes == 0) + free_bitmap(block_group, entry); + } else { + start = entry->offset + BITS_PER_BITMAP * + block_group->sectorsize; + spin_unlock(&block_group->tree_lock); + ret = 0; + continue; + } + } else { + start = entry->offset; + bytes = min(entry->bytes, end - start); + unlink_free_space(block_group, entry); + kfree(entry); + } + + spin_unlock(&block_group->tree_lock); + + if (bytes >= minlen) { + int update_ret; + update_ret = btrfs_update_reserved_bytes(block_group, + bytes, 1, 1); + + ret = btrfs_error_discard_extent(fs_info->extent_root, + start, + bytes, + &actually_trimmed); + + btrfs_add_free_space(block_group, + start, bytes); + if (!update_ret) + btrfs_update_reserved_bytes(block_group, + bytes, 0, 1); + + if (ret) + break; + *trimmed += actually_trimmed; + } + start += bytes; + bytes = 0; + + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + cond_resched(); + } + + return ret; +} diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, int btrfs_return_cluster_to_free_space( struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster); +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen); #endif diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 32c980ae0f1c..649f47d2afb4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -258,6 +259,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) return put_user(inode->i_generation, arg); } +static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) +{ + struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_device *device; + struct request_queue *q; + struct fstrim_range range; + u64 minlen = ULLONG_MAX; + u64 num_devices = 0; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + if (!device->bdev) + continue; + q = bdev_get_queue(device->bdev); + if (blk_queue_discard(q)) { + num_devices++; + minlen = min((u64)q->limits.discard_granularity, + minlen); + } + } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (!num_devices) + return -EOPNOTSUPP; + + if (copy_from_user(&range, arg, sizeof(range))) + return -EFAULT; + + range.minlen = max(range.minlen, minlen); + ret = btrfs_trim_fs(root, &range); + if (ret < 0) + return ret; + + if (copy_to_user(arg, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, @@ -2426,6 +2470,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_setflags(file, argp); case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(file, argp); + case FITRIM: + return btrfs_ioctl_fitrim(file, argp); case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: -- cgit 1.4.1 From dac97e516c617f9c797f64b0224050b70aea30c7 Mon Sep 17 00:00:00 2001 From: Yoshinori Sano Date: Tue, 15 Feb 2011 12:01:42 +0000 Subject: Btrfs: fix uncheck memory allocations To make Btrfs code more robust, several return value checks where memory allocation can fail are introduced. I use BUG_ON where I don't know how to handle the error properly, which increases the number of using the notorious BUG_ON, though. Signed-off-by: Yoshinori Sano Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 6 ++++++ fs/btrfs/extent-tree.c | 4 ++++ fs/btrfs/inode.c | 2 ++ 3 files changed, 12 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4d2110eafe29..992a4b92083e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); + if (!cb) + return -ENOMEM; atomic_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; @@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); + if(!bio) { + kfree(cb); + return -ENOMEM; + } bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; atomic_inc(&cb->pending_bios); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1efeda3b2f6f..cd0b69f57375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6978,6 +6978,10 @@ static noinline int get_new_locations(struct inode *reloc_inode, struct disk_extent *old = exts; max *= 2; exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); + if (!exts) { + ret = -ENOMEM; + goto out; + } memcpy(exts, old, sizeof(*exts) * nr); if (old != *extents) kfree(old); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67fd6e9552d3..f739b256967e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -290,6 +290,7 @@ static noinline int add_async_extent(struct async_cow *cow, struct async_extent *async_extent; async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); + BUG_ON(!async_extent); async_extent->start = start; async_extent->ram_size = ram_size; async_extent->compressed_size = compressed_size; @@ -388,6 +389,7 @@ again: (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + BUG_ON(!pages); if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; -- cgit 1.4.1 From 9f7c43c96727a53bea45f7f2549d897f0a6117b8 Mon Sep 17 00:00:00 2001 From: liubo Date: Mon, 7 Mar 2011 02:13:33 +0000 Subject: Btrfs: fix memory leak of empty filesystem after balance After Josef's patch(commit 3c14874acc71180553fb5aba528e3cf57c5b958b), btrfs will exclude super bytes when reading block groups(by marking a extent state UPTODATE). However, these bytes do not get freed while balance remove unused block groups, and we won't process those removed ones any more, when we do umount and unload the btrfs module, btrfs hits a memory leak. This patch add the missing free operation. Reproduce steps: $ mkfs.btrfs disk $ mount disk /mnt/btrfs -o loop $ btrfs filesystem balance /mnt/btrfs $ umount /mnt/btrfs $ rmmod btrfs Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd0b69f57375..a561060f5ffb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8669,6 +8669,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, BUG_ON(!block_group); BUG_ON(!block_group->ro); + /* + * Free the reserved super bytes from this block group before + * remove it. + */ + free_excluded_extents(root, block_group); + memcpy(&key, &block_group->key, sizeof(key)); if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | -- cgit 1.4.1 From c59021f846881a957ac5afe456d0f59d6a517b61 Mon Sep 17 00:00:00 2001 From: liubo Date: Mon, 7 Mar 2011 02:13:14 +0000 Subject: Btrfs: fix OOPS of empty filesystem after balance btrfs will remove unused block groups after balance. When a empty filesystem is balanced, the block group with tag "DATA" may be dropped, and after umount and mount again, it will not find "DATA" space_info and lead to OOPS. So we initial the necessary space_infos(DATA, SYSTEM, METADATA) to avoid OOPS. Reported-by: Daniel J Blueman Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 6 ++++++ fs/btrfs/extent-tree.c | 23 +++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 93a0191aded6..d47ce8307854 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2234,6 +2234,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); +int btrfs_init_space_info(struct btrfs_fs_info *fs_info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8ecc5419d8b6..b3fc8475870f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2065,6 +2065,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + ret = btrfs_init_space_info(fs_info); + if (ret) { + printk(KERN_ERR "Failed to initial space info: %d\n", ret); + goto fail_block_groups; + } + ret = btrfs_read_block_groups(extent_root); if (ret) { printk(KERN_ERR "Failed to read block groups: %d\n", ret); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a561060f5ffb..f619c3cb13b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8778,6 +8778,29 @@ out: return ret; } +int btrfs_init_space_info(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + int ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, + &space_info); + if (ret) + return ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, + &space_info); + if (ret) + return ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, + &space_info); + if (ret) + return ret; + + return ret; +} + int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { return unpin_extent_range(root, start, end); -- cgit 1.4.1 From 0e4f8f888845f9dca540ad175884244e5db5eea2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Apr 2011 16:05:44 -0400 Subject: Btrfs: don't force chunk allocation in find_free_extent find_free_extent likes to allocate in contiguous clusters, which makes writeback faster, especially on SSD storage. As the FS fragments, these clusters become harder to find and we have to decide between allocating a new chunk to make more clusters or giving up on the cluster to allocate from the free space we have. Right now it creates too many chunks, and you can end up with a whole FS that is mostly empty metadata chunks. This commit changes the allocation code to be more strict and only allocate new chunks when we've made good use of the chunks we already have. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 95 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f619c3cb13b7..26479484180d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,25 @@ #include "locking.h" #include "free-space-cache.h" +/* control flags for do_chunk_alloc's force field + * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk + * if we really need one. + * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * + * CHUNK_ALLOC_LIMITED means to only try and allocate one + * if we have very few chunks already allocated. This is + * used as part of the clustering code to help make sure + * we have a good pool of storage to cluster in, without + * filling the FS with empty chunks + * + */ +enum { + CHUNK_ALLOC_NO_FORCE = 0, + CHUNK_ALLOC_FORCE = 1, + CHUNK_ALLOC_LIMITED = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -3019,7 +3038,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_readonly = 0; found->bytes_may_use = 0; found->full = 0; - found->force_alloc = 0; + found->force_alloc = CHUNK_ALLOC_NO_FORCE; *space_info = found; list_add_rcu(&found->list, &info->space_info); atomic_set(&found->caching_threads, 0); @@ -3150,7 +3169,7 @@ again: if (!data_sinfo->full && alloc_chunk) { u64 alloc_target; - data_sinfo->force_alloc = 1; + data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); @@ -3160,7 +3179,8 @@ alloc: ret = do_chunk_alloc(trans, root->fs_info->extent_root, bytes + 2 * 1024 * 1024, - alloc_target, 0); + alloc_target, + CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); if (ret < 0) { if (ret != -ENOSPC) @@ -3239,31 +3259,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { if (found->flags & BTRFS_BLOCK_GROUP_METADATA) - found->force_alloc = 1; + found->force_alloc = CHUNK_ALLOC_FORCE; } rcu_read_unlock(); } static int should_alloc_chunk(struct btrfs_root *root, - struct btrfs_space_info *sinfo, u64 alloc_bytes) + struct btrfs_space_info *sinfo, u64 alloc_bytes, + int force) { u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; + u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes + 256 * 1024 * 1024 < num_bytes) + if (force == CHUNK_ALLOC_FORCE) + return 1; + + /* + * in limited mode, we want to have some free space up to + * about 1% of the FS size. + */ + if (force == CHUNK_ALLOC_LIMITED) { + thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = max_t(u64, 64 * 1024 * 1024, + div_factor_fine(thresh, 1)); + + if (num_bytes - num_allocated < thresh) + return 1; + } + + /* + * we have two similar checks here, one based on percentage + * and once based on a hard number of 256MB. The idea + * is that if we have a good amount of free + * room, don't allocate a chunk. A good mount is + * less than 80% utilized of the chunks we have allocated, + * or more than 256MB free + */ + if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) return 0; - if (sinfo->bytes_used + sinfo->bytes_reserved + - alloc_bytes < div_factor(num_bytes, 8)) + if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) return 0; thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + + /* 256MB or 5% of the FS */ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) return 0; - return 1; } @@ -3289,17 +3334,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, spin_lock(&space_info->lock); if (space_info->force_alloc) - force = 1; + force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); goto out; } - if (!force && !should_alloc_chunk(extent_root, space_info, - alloc_bytes)) { + if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { spin_unlock(&space_info->lock); goto out; } + spin_unlock(&space_info->lock); /* @@ -3327,7 +3372,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->full = 1; else ret = 1; - space_info->force_alloc = 0; + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; spin_unlock(&space_info->lock); out: mutex_unlock(&extent_root->fs_info->chunk_mutex); @@ -5303,11 +5348,13 @@ loop: if (allowed_chunk_alloc) { ret = do_chunk_alloc(trans, root, num_bytes + - 2 * 1024 * 1024, data, 1); + 2 * 1024 * 1024, data, + CHUNK_ALLOC_LIMITED); allowed_chunk_alloc = 0; done_chunk_alloc = 1; - } else if (!done_chunk_alloc) { - space_info->force_alloc = 1; + } else if (!done_chunk_alloc && + space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { + space_info->force_alloc = CHUNK_ALLOC_LIMITED; } if (loop < LOOP_NO_EMPTY_SIZE) { @@ -5393,7 +5440,8 @@ again: */ if (empty_size || root->ref_cows) ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data, 0); + num_bytes + 2 * 1024 * 1024, data, + CHUNK_ALLOC_NO_FORCE); WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -5405,7 +5453,7 @@ again: num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, 1); + num_bytes, data, CHUNK_ALLOC_FORCE); goto again; } if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { @@ -8109,13 +8157,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, alloc_flags = update_block_group_flags(root, cache->flags); if (alloc_flags != cache->flags) - do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); ret = set_block_group_ro(cache); if (!ret) goto out; alloc_flags = get_alloc_profile(root, cache->space_info->flags); - ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); if (ret < 0) goto out; ret = set_block_group_ro(cache); @@ -8128,7 +8178,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type) { u64 alloc_flags = get_alloc_profile(root, type); - return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); + return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, + CHUNK_ALLOC_FORCE); } /* -- cgit 1.4.1 From 6d74119f1a3efad9dc7f79a16c201242324b731f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 20:20:11 -0400 Subject: Btrfs: avoid taking the chunk_mutex in do_chunk_alloc Everytime we try to allocate disk space we try and see if we can pre-emptively allocate a chunk, but in the common case we don't allocate anything, so there is no sense in taking the chunk_mutex at all. So instead if we are allocating a chunk, mark it in the space_info so we don't get two people trying to allocate at the same time. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Liu Bo --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0d00a07b5b29..2e61fe1b6b8c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -740,8 +740,10 @@ struct btrfs_space_info { */ unsigned long reservation_progress; - int full; /* indicates that we cannot allocate any more + int full:1; /* indicates that we cannot allocate any more chunks for this space */ + int chunk_alloc:1; /* set if we are allocating a chunk */ + int force_alloc; /* set if we need to force a chunk alloc for this space */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26479484180d..31f33ba56fe8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3039,6 +3039,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_may_use = 0; found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; + found->chunk_alloc = 0; *space_info = found; list_add_rcu(&found->list, &info->space_info); atomic_set(&found->caching_threads, 0); @@ -3318,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, { struct btrfs_space_info *space_info; struct btrfs_fs_info *fs_info = extent_root->fs_info; + int wait_for_alloc = 0; int ret = 0; - mutex_lock(&fs_info->chunk_mutex); - flags = btrfs_reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); @@ -3332,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); +again: spin_lock(&space_info->lock); if (space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); - goto out; + return 0; } if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { spin_unlock(&space_info->lock); - goto out; + return 0; + } else if (space_info->chunk_alloc) { + wait_for_alloc = 1; + } else { + space_info->chunk_alloc = 1; } spin_unlock(&space_info->lock); + mutex_lock(&fs_info->chunk_mutex); + + /* + * The chunk_mutex is held throughout the entirety of a chunk + * allocation, so once we've acquired the chunk_mutex we know that the + * other guy is done and we need to recheck and see if we should + * allocate. + */ + if (wait_for_alloc) { + mutex_unlock(&fs_info->chunk_mutex); + wait_for_alloc = 0; + goto again; + } + /* * If we have mixed data/metadata chunks we want to make sure we keep * allocating mixed chunks instead of individual chunks. @@ -3372,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->full = 1; else ret = 1; + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; + space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); -out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } -- cgit 1.4.1