ext4: add discard/zeroout flags to journal flush
Add a flags argument to jbd2_journal_flush to enable discarding or zero-filling the journal blocks while flushing the journal. Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com> Link: https://lore.kernel.org/r/20210518151327.130198-1-leah.rumancik@gmail.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
8f6840c4fd
commit
01d5d96542
|
@ -3223,7 +3223,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
|
||||||
ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
|
ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
|
||||||
journal = EXT4_JOURNAL(inode);
|
journal = EXT4_JOURNAL(inode);
|
||||||
jbd2_journal_lock_updates(journal);
|
jbd2_journal_lock_updates(journal);
|
||||||
err = jbd2_journal_flush(journal);
|
err = jbd2_journal_flush(journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -6005,7 +6005,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
||||||
if (val)
|
if (val)
|
||||||
ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
|
ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
|
||||||
else {
|
else {
|
||||||
err = jbd2_journal_flush(journal);
|
err = jbd2_journal_flush(journal, 0);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
percpu_up_write(&sbi->s_writepages_rwsem);
|
||||||
|
|
|
@ -706,7 +706,7 @@ static long ext4_ioctl_group_add(struct file *file,
|
||||||
err = ext4_group_add(sb, input);
|
err = ext4_group_add(sb, input);
|
||||||
if (EXT4_SB(sb)->s_journal) {
|
if (EXT4_SB(sb)->s_journal) {
|
||||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
|
||||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||||
}
|
}
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
@ -884,7 +884,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
|
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
|
||||||
if (EXT4_SB(sb)->s_journal) {
|
if (EXT4_SB(sb)->s_journal) {
|
||||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
|
||||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||||
}
|
}
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
@ -1027,7 +1027,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
if (EXT4_SB(sb)->s_journal) {
|
if (EXT4_SB(sb)->s_journal) {
|
||||||
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
|
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
|
||||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||||
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
|
||||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||||
}
|
}
|
||||||
if (err == 0)
|
if (err == 0)
|
||||||
|
|
|
@ -5653,7 +5653,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
jbd2_journal_lock_updates(journal);
|
jbd2_journal_lock_updates(journal);
|
||||||
err = jbd2_journal_flush(journal);
|
err = jbd2_journal_flush(journal, 0);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -5795,7 +5795,7 @@ static int ext4_freeze(struct super_block *sb)
|
||||||
* Don't clear the needs_recovery flag if we failed to
|
* Don't clear the needs_recovery flag if we failed to
|
||||||
* flush the journal.
|
* flush the journal.
|
||||||
*/
|
*/
|
||||||
error = jbd2_journal_flush(journal);
|
error = jbd2_journal_flush(journal, 0);
|
||||||
if (error < 0)
|
if (error < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -6389,7 +6389,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
|
||||||
* otherwise be livelocked...
|
* otherwise be livelocked...
|
||||||
*/
|
*/
|
||||||
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
||||||
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
|
||||||
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
|
@ -1686,6 +1686,110 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
|
||||||
write_unlock(&journal->j_state_lock);
|
write_unlock(&journal->j_state_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
|
||||||
|
* @journal: The journal to erase.
|
||||||
|
* @flags: A discard/zeroout request is sent for each physically contigous
|
||||||
|
* region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
|
||||||
|
* JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
|
||||||
|
* to perform.
|
||||||
|
*
|
||||||
|
* Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
|
||||||
|
* will be explicitly written if no hardware offload is available, see
|
||||||
|
* blkdev_issue_zeroout for more details.
|
||||||
|
*/
|
||||||
|
static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
unsigned long block, log_offset; /* logical */
|
||||||
|
unsigned long long phys_block, block_start, block_stop; /* physical */
|
||||||
|
loff_t byte_start, byte_stop, byte_count;
|
||||||
|
struct request_queue *q = bdev_get_queue(journal->j_dev);
|
||||||
|
|
||||||
|
/* flags must be set to either discard or zeroout */
|
||||||
|
if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
|
||||||
|
((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
|
||||||
|
(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!q)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lookup block mapping and issue discard/zeroout for each
|
||||||
|
* contiguous region
|
||||||
|
*/
|
||||||
|
log_offset = be32_to_cpu(journal->j_superblock->s_first);
|
||||||
|
block_start = ~0ULL;
|
||||||
|
for (block = log_offset; block < journal->j_total_len; block++) {
|
||||||
|
err = jbd2_journal_bmap(journal, block, &phys_block);
|
||||||
|
if (err) {
|
||||||
|
pr_err("JBD2: bad block at offset %lu", block);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block_start == ~0ULL) {
|
||||||
|
block_start = phys_block;
|
||||||
|
block_stop = block_start - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* last block not contiguous with current block,
|
||||||
|
* process last contiguous region and return to this block on
|
||||||
|
* next loop
|
||||||
|
*/
|
||||||
|
if (phys_block != block_stop + 1) {
|
||||||
|
block--;
|
||||||
|
} else {
|
||||||
|
block_stop++;
|
||||||
|
/*
|
||||||
|
* if this isn't the last block of journal,
|
||||||
|
* no need to process now because next block may also
|
||||||
|
* be part of this contiguous region
|
||||||
|
*/
|
||||||
|
if (block != journal->j_total_len - 1)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* end of contiguous region or this is last block of journal,
|
||||||
|
* take care of the region
|
||||||
|
*/
|
||||||
|
byte_start = block_start * journal->j_blocksize;
|
||||||
|
byte_stop = block_stop * journal->j_blocksize;
|
||||||
|
byte_count = (block_stop - block_start + 1) *
|
||||||
|
journal->j_blocksize;
|
||||||
|
|
||||||
|
truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
|
||||||
|
byte_start, byte_stop);
|
||||||
|
|
||||||
|
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
|
||||||
|
err = blkdev_issue_discard(journal->j_dev,
|
||||||
|
byte_start >> SECTOR_SHIFT,
|
||||||
|
byte_count >> SECTOR_SHIFT,
|
||||||
|
GFP_NOFS, 0);
|
||||||
|
} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
|
||||||
|
err = blkdev_issue_zeroout(journal->j_dev,
|
||||||
|
byte_start >> SECTOR_SHIFT,
|
||||||
|
byte_count >> SECTOR_SHIFT,
|
||||||
|
GFP_NOFS, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(err != 0)) {
|
||||||
|
pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
|
||||||
|
err, block_start, block_stop);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset start and stop after processing a region */
|
||||||
|
block_start = ~0ULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return blkdev_issue_flush(journal->j_dev);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* jbd2_journal_update_sb_errno() - Update error in the journal.
|
* jbd2_journal_update_sb_errno() - Update error in the journal.
|
||||||
|
@ -2246,13 +2350,18 @@ EXPORT_SYMBOL(jbd2_journal_clear_features);
|
||||||
/**
|
/**
|
||||||
* jbd2_journal_flush() - Flush journal
|
* jbd2_journal_flush() - Flush journal
|
||||||
* @journal: Journal to act on.
|
* @journal: Journal to act on.
|
||||||
|
* @flags: optional operation on the journal blocks after the flush (see below)
|
||||||
*
|
*
|
||||||
* Flush all data for a given journal to disk and empty the journal.
|
* Flush all data for a given journal to disk and empty the journal.
|
||||||
* Filesystems can use this when remounting readonly to ensure that
|
* Filesystems can use this when remounting readonly to ensure that
|
||||||
* recovery does not need to happen on remount.
|
* recovery does not need to happen on remount. Optionally, a discard or zeroout
|
||||||
|
* can be issued on the journal blocks after flushing.
|
||||||
|
*
|
||||||
|
* flags:
|
||||||
|
* JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
|
||||||
|
* JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
|
||||||
*/
|
*/
|
||||||
|
int jbd2_journal_flush(journal_t *journal, unsigned int flags)
|
||||||
int jbd2_journal_flush(journal_t *journal)
|
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
transaction_t *transaction = NULL;
|
transaction_t *transaction = NULL;
|
||||||
|
@ -2306,6 +2415,10 @@ int jbd2_journal_flush(journal_t *journal)
|
||||||
* commits of data to the journal will restore the current
|
* commits of data to the journal will restore the current
|
||||||
* s_start value. */
|
* s_start value. */
|
||||||
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
|
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
|
||||||
|
|
||||||
|
if (flags)
|
||||||
|
err = __jbd2_journal_erase(journal, flags);
|
||||||
|
|
||||||
mutex_unlock(&journal->j_checkpoint_mutex);
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
||||||
write_lock(&journal->j_state_lock);
|
write_lock(&journal->j_state_lock);
|
||||||
J_ASSERT(!journal->j_running_transaction);
|
J_ASSERT(!journal->j_running_transaction);
|
||||||
|
|
|
@ -6018,7 +6018,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
|
||||||
* Then truncate log will be replayed resulting in cluster double free.
|
* Then truncate log will be replayed resulting in cluster double free.
|
||||||
*/
|
*/
|
||||||
jbd2_journal_lock_updates(journal->j_journal);
|
jbd2_journal_lock_updates(journal->j_journal);
|
||||||
status = jbd2_journal_flush(journal->j_journal);
|
status = jbd2_journal_flush(journal->j_journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal->j_journal);
|
jbd2_journal_unlock_updates(journal->j_journal);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
|
|
@ -308,7 +308,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
|
||||||
}
|
}
|
||||||
|
|
||||||
jbd2_journal_lock_updates(journal->j_journal);
|
jbd2_journal_lock_updates(journal->j_journal);
|
||||||
status = jbd2_journal_flush(journal->j_journal);
|
status = jbd2_journal_flush(journal->j_journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal->j_journal);
|
jbd2_journal_unlock_updates(journal->j_journal);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
up_write(&journal->j_trans_barrier);
|
up_write(&journal->j_trans_barrier);
|
||||||
|
@ -1000,7 +1000,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
|
||||||
|
|
||||||
if (ocfs2_mount_local(osb)) {
|
if (ocfs2_mount_local(osb)) {
|
||||||
jbd2_journal_lock_updates(journal->j_journal);
|
jbd2_journal_lock_updates(journal->j_journal);
|
||||||
status = jbd2_journal_flush(journal->j_journal);
|
status = jbd2_journal_flush(journal->j_journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal->j_journal);
|
jbd2_journal_unlock_updates(journal->j_journal);
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
@ -1070,7 +1070,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
|
||||||
|
|
||||||
if (replayed) {
|
if (replayed) {
|
||||||
jbd2_journal_lock_updates(journal->j_journal);
|
jbd2_journal_lock_updates(journal->j_journal);
|
||||||
status = jbd2_journal_flush(journal->j_journal);
|
status = jbd2_journal_flush(journal->j_journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal->j_journal);
|
jbd2_journal_unlock_updates(journal->j_journal);
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
@ -1666,7 +1666,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
|
||||||
|
|
||||||
/* wipe the journal */
|
/* wipe the journal */
|
||||||
jbd2_journal_lock_updates(journal);
|
jbd2_journal_lock_updates(journal);
|
||||||
status = jbd2_journal_flush(journal);
|
status = jbd2_journal_flush(journal, 0);
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
|
|
@ -1370,6 +1370,10 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
|
||||||
* mode */
|
* mode */
|
||||||
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
|
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
|
||||||
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
|
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
|
||||||
|
#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001
|
||||||
|
#define JBD2_JOURNAL_FLUSH_ZEROOUT 0x0002
|
||||||
|
#define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \
|
||||||
|
JBD2_JOURNAL_FLUSH_ZEROOUT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Function declarations for the journaling transaction and buffer
|
* Function declarations for the journaling transaction and buffer
|
||||||
|
@ -1500,7 +1504,7 @@ extern int jbd2_journal_invalidatepage(journal_t *,
|
||||||
struct page *, unsigned int, unsigned int);
|
struct page *, unsigned int, unsigned int);
|
||||||
extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
|
extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
|
||||||
extern int jbd2_journal_stop(handle_t *);
|
extern int jbd2_journal_stop(handle_t *);
|
||||||
extern int jbd2_journal_flush (journal_t *);
|
extern int jbd2_journal_flush(journal_t *journal, unsigned int flags);
|
||||||
extern void jbd2_journal_lock_updates (journal_t *);
|
extern void jbd2_journal_lock_updates (journal_t *);
|
||||||
extern void jbd2_journal_unlock_updates (journal_t *);
|
extern void jbd2_journal_unlock_updates (journal_t *);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue