Btrfs: Fix the defragmention code and the block relocation code for data=ordered (3eaa2885) · Commits · github.com / kvm-riscv / linux

fs/btrfs/ctree.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -538,6 +538,13 @@ struct btrfs_fs_info {
		struct list_head dead_roots;
		atomic_t nr_async_submits;

		/*
		* this is used by the balancing code to wait for all the pending
		* ordered extents
		*/
		spinlock_t ordered_extent_lock;
		struct list_head ordered_extents;

		/*
		* there is a pool of worker threads for checksumming during writes
		* and a pool for checksumming after reads. This is because readers

fs/btrfs/disk-io.c

+3 −0

Original line number	Diff line number	Diff line
		@@ -1252,6 +1252,9 @@ struct btrfs_root open_ctree(struct super_block sb,
		fs_info->btree_inode->i_nlink = 1;
		fs_info->thread_pool_size = min(num_online_cpus() + 2, 8);

		INIT_LIST_HEAD(&fs_info->ordered_extents);
		spin_lock_init(&fs_info->ordered_extent_lock);

		sb->s_blocksize = 4096;
		sb->s_blocksize_bits = blksize_bits(4096);

fs/btrfs/extent-tree.c

+28 −11

Original line number	Diff line number	Diff line
		@@ -2640,6 +2640,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
		struct file_ra_state *ra;
		unsigned long total_read = 0;
		unsigned long ra_pages;
		struct btrfs_ordered_extent *ordered;
		struct btrfs_trans_handle *trans;

		ra = kzalloc(sizeof(*ra), GFP_NOFS);
		@@ -2658,9 +2659,9 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
		calc_ra(i, last_index, ra_pages));
		}
		total_read++;
		if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
		again:
		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
		goto truncate_racing;

		page = grab_cache_page(inode->i_mapping, i);
		if (!page) {
		goto out_unlock;
		@@ -2674,18 +2675,24 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
		goto out_unlock;
		}
		}
		#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
		ClearPageDirty(page);
		#else
		cancel_dirty_page(page, PAGE_CACHE_SIZE);
		#endif
		wait_on_page_writeback(page);
		set_page_extent_mapped(page);

		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
		page_end = page_start + PAGE_CACHE_SIZE - 1;

		lock_extent(io_tree, page_start, page_end, GFP_NOFS);

		ordered = btrfs_lookup_ordered_extent(inode, page_start);
		if (ordered) {
		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
		unlock_page(page);
		page_cache_release(page);
		btrfs_start_ordered_extent(inode, ordered, 1);
		btrfs_put_ordered_extent(ordered);
		goto again;
		}
		set_page_extent_mapped(page);


		set_extent_delalloc(io_tree, page_start,
		page_end, GFP_NOFS);
		set_page_dirty(page);
		@@ -2694,10 +2701,18 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
		unlock_page(page);
		page_cache_release(page);
		}
		balance_dirty_pages_ratelimited_nr(inode->i_mapping,
		total_read);

		out_unlock:
		/* we have to start the IO in order to get the ordered extents
		* instantiated. This allows the relocation to code to wait
		* for all the ordered extents to hit the disk.
		*
		* Otherwise, it would constantly loop over the same extents
		* because the old ones don't get deleted until the IO is
		* started
		*/
		btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
		WB_SYNC_NONE);
		kfree(ra);
		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
		if (trans) {
		@@ -3238,6 +3253,8 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)

		btrfs_clean_old_snapshots(tree_root);

		btrfs_wait_ordered_extents(tree_root);

		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);
		mutex_lock(&root->fs_info->alloc_mutex);

fs/btrfs/ioctl.c

+14 −7

Original line number	Diff line number	Diff line
		@@ -213,6 +213,7 @@ int btrfs_defrag_file(struct file *file)
		struct inode *inode = fdentry(file)->d_inode;
		struct btrfs_root *root = BTRFS_I(inode)->root;
		struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
		struct btrfs_ordered_extent *ordered;
		struct page *page;
		unsigned long last_index;
		unsigned long ra_pages = root->fs_info->bdi.ra_pages;
		@@ -234,6 +235,7 @@ int btrfs_defrag_file(struct file *file)
		min(last_index, i + ra_pages - 1));
		}
		total_read++;
		again:
		page = grab_cache_page(inode->i_mapping, i);
		if (!page)
		goto out_unlock;
		@@ -247,18 +249,23 @@ int btrfs_defrag_file(struct file *file)
		}
		}

		#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
		ClearPageDirty(page);
		#else
		cancel_dirty_page(page, PAGE_CACHE_SIZE);
		#endif
		wait_on_page_writeback(page);
		set_page_extent_mapped(page);

		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
		page_end = page_start + PAGE_CACHE_SIZE - 1;

		lock_extent(io_tree, page_start, page_end, GFP_NOFS);

		ordered = btrfs_lookup_ordered_extent(inode, page_start);
		if (ordered) {
		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
		unlock_page(page);
		page_cache_release(page);
		btrfs_start_ordered_extent(inode, ordered, 1);
		btrfs_put_ordered_extent(ordered);
		goto again;
		}
		set_page_extent_mapped(page);

		set_extent_delalloc(io_tree, page_start,
		page_end, GFP_NOFS);

fs/btrfs/ordered-data.c

+54 −2

Original line number	Diff line number	Diff line
		@@ -167,20 +167,28 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
		entry->file_offset = file_offset;
		entry->start = start;
		entry->len = len;
		entry->inode = inode;

		/* one ref for the tree */
		atomic_set(&entry->refs, 1);
		init_waitqueue_head(&entry->wait);
		INIT_LIST_HEAD(&entry->list);
		INIT_LIST_HEAD(&entry->root_extent_list);

		node = tree_insert(&tree->tree, file_offset,
		&entry->rb_node);
		if (node) {
		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
		atomic_inc(&entry->refs);
		printk("warning dup entry from add_ordered_extent\n");
		BUG();
		}
		set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
		entry_end(entry) - 1, GFP_NOFS);

		spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
		list_add_tail(&entry->root_extent_list,
		&BTRFS_I(inode)->root->fs_info->ordered_extents);
		spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);

		mutex_unlock(&tree->mutex);
		BUG_ON(node);
		return 0;
		@@ -285,11 +293,55 @@ int btrfs_remove_ordered_extent(struct inode *inode,
		rb_erase(node, &tree->tree);
		tree->last = NULL;
		set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);

		spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
		list_del_init(&entry->root_extent_list);
		spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);

		mutex_unlock(&tree->mutex);
		wake_up(&entry->wait);
		return 0;
		}

		int btrfs_wait_ordered_extents(struct btrfs_root *root)
		{
		struct list_head splice;
		struct list_head *cur;
		struct btrfs_ordered_extent *ordered;
		struct inode *inode;

		INIT_LIST_HEAD(&splice);

		spin_lock(&root->fs_info->ordered_extent_lock);
		list_splice_init(&root->fs_info->ordered_extents, &splice);
		while(!list_empty(&splice)) {
		cur = splice.next;
		ordered = list_entry(cur, struct btrfs_ordered_extent,
		root_extent_list);
		list_del_init(&ordered->root_extent_list);
		atomic_inc(&ordered->refs);
		inode = ordered->inode;

		/*
		* the inode can't go away until all the pages are gone
		* and the pages won't go away while there is still
		* an ordered extent and the ordered extent won't go
		* away until it is off this list. So, we can safely
		* increment i_count here and call iput later
		*/
		atomic_inc(&inode->i_count);
		spin_unlock(&root->fs_info->ordered_extent_lock);

		btrfs_start_ordered_extent(inode, ordered, 1);
		btrfs_put_ordered_extent(ordered);
		iput(inode);

		spin_lock(&root->fs_info->ordered_extent_lock);
		}
		spin_unlock(&root->fs_info->ordered_extent_lock);
		return 0;
		}

		/*
		* Used to start IO or wait for a given ordered extent to finish.
		*