diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7923336ecf94..24518b57733e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2043,11 +2043,12 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh) || buffer_delay(bh); } -/* FIXME!! only support data=writeback mode */ /* * get called vi ext4_da_writepages after taking page lock * We may end up doing block allocation here in case * mpage_da_map_blocks failed to allocate blocks. + * + * We also get called via journal_submit_inode_data_buffers */ static int ext4_da_writepage(struct page *page, struct writeback_control *wbc) @@ -2066,6 +2067,7 @@ static int ext4_da_writepage(struct page *page, * ext4_da_writepages() but directly (shrink_page_list). * We cannot easily start a transaction here so we just skip * writing the page in case we would have to do so. + * We reach here also via journal_submit_inode_data_buffers */ size = i_size_read(inode); @@ -2081,8 +2083,11 @@ static int ext4_da_writepage(struct page *page, * We can't do block allocation under * page lock without a handle . So redirty * the page and return + * We may reach here when we do a journal commit + * via journal_submit_inode_data_buffers. + * If we don't have mapping block we just ignore + * them */ - BUG_ON(wbc->sync_mode != WB_SYNC_NONE); redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; @@ -2097,7 +2102,6 @@ static int ext4_da_writepage(struct page *page, return ret; } - /* * For now just follow the DIO way to estimate the max credits * needed to write out EXT4_MAX_WRITEBACK_PAGES. @@ -2130,7 +2134,7 @@ static int ext4_da_writepages(struct address_space *mapping, return 0; /* - * Estimate the worse case needed credits to write out + * Estimate the worse case needed credits to write out * EXT4_MAX_BUF_BLOCKS pages */ needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; @@ -2152,6 +2156,19 @@ static int ext4_da_writepages(struct address_space *mapping, ret = PTR_ERR(handle); goto out_writepages; } + if (ext4_should_order_data(inode)) { + /* + * With ordered mode we need to add + * the inode to the journal handle + * when we do block allocation. + */ + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out_writepages; + } + + } /* * set the max dirty pages could be write at a time * to fit into the reserved transaction credits @@ -2735,7 +2752,10 @@ static const struct address_space_operations ext4_da_aops = { void ext4_set_aops(struct inode *inode) { - if (ext4_should_order_data(inode)) + if (ext4_should_order_data(inode) && + test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else if (ext4_should_order_data(inode)) inode->i_mapping->a_ops = &ext4_ordered_aops; else if (ext4_should_writeback_data(inode) && test_opt(inode->i_sb, DELALLOC)) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 483183d15ed5..f8b3be873226 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -184,6 +186,27 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) return ret; } +/* + * write the filemap data using writepage() address_space_operations. + * We don't do block allocation here even for delalloc. We don't + * use writepages() because with dealyed allocation we may be doing + * block allocation in writepages(). + */ +static int journal_submit_inode_data_buffers(struct address_space *mapping) +{ + int ret; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = mapping->nrpages * 2, + .range_start = 0, + .range_end = i_size_read(mapping->host), + .for_writepages = 1, + }; + + ret = generic_writepages(mapping, &wbc); + return ret; +} + /* * Submit all the data buffers of inode associated with the transaction to * disk. @@ -192,7 +215,7 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently * operate on from being released while we write out pages. */ -static int journal_submit_inode_data_buffers(journal_t *journal, +static int journal_submit_data_buffers(journal_t *journal, transaction_t *commit_transaction) { struct jbd2_inode *jinode; @@ -204,8 +227,13 @@ static int journal_submit_inode_data_buffers(journal_t *journal, mapping = jinode->i_vfs_inode->i_mapping; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawrite_range(mapping, 0, - i_size_read(jinode->i_vfs_inode)); + /* + * submit the inode data buffers. We use writepage + * instead of writepages. Because writepages can do + * block allocation with delalloc. We need to write + * only allocated blocks here. + */ + err = journal_submit_inode_data_buffers(mapping); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -228,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, struct jbd2_inode *jinode, *next_i; int err, ret = 0; - /* For locking, see the comment in journal_submit_inode_data_buffers() */ + /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { jinode->i_flags |= JI_COMMIT_RUNNING; @@ -431,7 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ - err = journal_submit_inode_data_buffers(journal, commit_transaction); + err = journal_submit_data_buffers(journal, commit_transaction); if (err) jbd2_journal_abort(journal, err);