ext4: add discard/zeroout flags to journal flush

Add a flags argument to jbd2_journal_flush to enable discarding or
zero-filling the journal blocks while flushing the journal.

Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Link: https://lore.kernel.org/r/20210518151327.130198-1-leah.rumancik@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
Leah Rumancik 2021-05-18 15:13:25 +00:00 committed by Theodore Ts'o
parent 8f6840c4fd
commit 01d5d96542
7 changed files with 134 additions and 17 deletions

View File

@ -3223,7 +3223,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
ext4_clear_inode_state(inode, EXT4_STATE_JDATA); ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
journal = EXT4_JOURNAL(inode); journal = EXT4_JOURNAL(inode);
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal, 0);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
if (err) if (err)
@ -6005,7 +6005,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (val) if (val)
ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
else { else {
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal, 0);
if (err < 0) { if (err < 0) {
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_writepages_rwsem); percpu_up_write(&sbi->s_writepages_rwsem);

View File

@ -706,7 +706,7 @@ static long ext4_ioctl_group_add(struct file *file,
err = ext4_group_add(sb, input); err = ext4_group_add(sb, input);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
} }
if (err == 0) if (err == 0)
@ -884,7 +884,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
} }
if (err == 0) if (err == 0)
@ -1027,7 +1027,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
} }
if (err == 0) if (err == 0)

View File

@ -5653,7 +5653,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
return 0; return 0;
} }
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal, 0);
if (err < 0) if (err < 0)
goto out; goto out;
@ -5795,7 +5795,7 @@ static int ext4_freeze(struct super_block *sb)
* Don't clear the needs_recovery flag if we failed to * Don't clear the needs_recovery flag if we failed to
* flush the journal. * flush the journal.
*/ */
error = jbd2_journal_flush(journal); error = jbd2_journal_flush(journal, 0);
if (error < 0) if (error < 0)
goto out; goto out;
@ -6389,7 +6389,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* otherwise be livelocked... * otherwise be livelocked...
*/ */
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err) if (err)
return err; return err;

View File

@ -1686,6 +1686,110 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/**
* __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
* @journal: The journal to erase.
* @flags: A discard/zeroout request is sent for each physically contigous
* region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
* JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
* to perform.
*
* Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
* will be explicitly written if no hardware offload is available, see
* blkdev_issue_zeroout for more details.
*/
static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
{
int err = 0;
unsigned long block, log_offset; /* logical */
unsigned long long phys_block, block_start, block_stop; /* physical */
loff_t byte_start, byte_stop, byte_count;
struct request_queue *q = bdev_get_queue(journal->j_dev);
/* flags must be set to either discard or zeroout */
if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
return -EINVAL;
if (!q)
return -ENXIO;
if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
return -EOPNOTSUPP;
/*
* lookup block mapping and issue discard/zeroout for each
* contiguous region
*/
log_offset = be32_to_cpu(journal->j_superblock->s_first);
block_start = ~0ULL;
for (block = log_offset; block < journal->j_total_len; block++) {
err = jbd2_journal_bmap(journal, block, &phys_block);
if (err) {
pr_err("JBD2: bad block at offset %lu", block);
return err;
}
if (block_start == ~0ULL) {
block_start = phys_block;
block_stop = block_start - 1;
}
/*
* last block not contiguous with current block,
* process last contiguous region and return to this block on
* next loop
*/
if (phys_block != block_stop + 1) {
block--;
} else {
block_stop++;
/*
* if this isn't the last block of journal,
* no need to process now because next block may also
* be part of this contiguous region
*/
if (block != journal->j_total_len - 1)
continue;
}
/*
* end of contiguous region or this is last block of journal,
* take care of the region
*/
byte_start = block_start * journal->j_blocksize;
byte_stop = block_stop * journal->j_blocksize;
byte_count = (block_stop - block_start + 1) *
journal->j_blocksize;
truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
byte_start, byte_stop);
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
err = blkdev_issue_discard(journal->j_dev,
byte_start >> SECTOR_SHIFT,
byte_count >> SECTOR_SHIFT,
GFP_NOFS, 0);
} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
err = blkdev_issue_zeroout(journal->j_dev,
byte_start >> SECTOR_SHIFT,
byte_count >> SECTOR_SHIFT,
GFP_NOFS, 0);
}
if (unlikely(err != 0)) {
pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
err, block_start, block_stop);
return err;
}
/* reset start and stop after processing a region */
block_start = ~0ULL;
}
return blkdev_issue_flush(journal->j_dev);
}
/** /**
* jbd2_journal_update_sb_errno() - Update error in the journal. * jbd2_journal_update_sb_errno() - Update error in the journal.
@ -2246,13 +2350,18 @@ EXPORT_SYMBOL(jbd2_journal_clear_features);
/** /**
* jbd2_journal_flush() - Flush journal * jbd2_journal_flush() - Flush journal
* @journal: Journal to act on. * @journal: Journal to act on.
* @flags: optional operation on the journal blocks after the flush (see below)
* *
* Flush all data for a given journal to disk and empty the journal. * Flush all data for a given journal to disk and empty the journal.
* Filesystems can use this when remounting readonly to ensure that * Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount. * recovery does not need to happen on remount. Optionally, a discard or zeroout
* can be issued on the journal blocks after flushing.
*
* flags:
* JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
* JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
*/ */
int jbd2_journal_flush(journal_t *journal, unsigned int flags)
int jbd2_journal_flush(journal_t *journal)
{ {
int err = 0; int err = 0;
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
@ -2306,6 +2415,10 @@ int jbd2_journal_flush(journal_t *journal)
* commits of data to the journal will restore the current * commits of data to the journal will restore the current
* s_start value. */ * s_start value. */
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
if (flags)
err = __jbd2_journal_erase(journal, flags);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
J_ASSERT(!journal->j_running_transaction); J_ASSERT(!journal->j_running_transaction);

View File

@ -6018,7 +6018,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
* Then truncate log will be replayed resulting in cluster double free. * Then truncate log will be replayed resulting in cluster double free.
*/ */
jbd2_journal_lock_updates(journal->j_journal); jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal); status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal); jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);

View File

@ -308,7 +308,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
} }
jbd2_journal_lock_updates(journal->j_journal); jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal); status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal); jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) { if (status < 0) {
up_write(&journal->j_trans_barrier); up_write(&journal->j_trans_barrier);
@ -1000,7 +1000,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (ocfs2_mount_local(osb)) { if (ocfs2_mount_local(osb)) {
jbd2_journal_lock_updates(journal->j_journal); jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal); status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal); jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) if (status < 0)
mlog_errno(status); mlog_errno(status);
@ -1070,7 +1070,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
if (replayed) { if (replayed) {
jbd2_journal_lock_updates(journal->j_journal); jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal); status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal); jbd2_journal_unlock_updates(journal->j_journal);
if (status < 0) if (status < 0)
mlog_errno(status); mlog_errno(status);
@ -1666,7 +1666,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
/* wipe the journal */ /* wipe the journal */
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
status = jbd2_journal_flush(journal); status = jbd2_journal_flush(journal, 0);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
if (status < 0) if (status < 0)
mlog_errno(status); mlog_errno(status);

View File

@ -1370,6 +1370,10 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
* mode */ * mode */
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ #define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ #define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001
#define JBD2_JOURNAL_FLUSH_ZEROOUT 0x0002
#define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \
JBD2_JOURNAL_FLUSH_ZEROOUT)
/* /*
* Function declarations for the journaling transaction and buffer * Function declarations for the journaling transaction and buffer
@ -1500,7 +1504,7 @@ extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned int, unsigned int); struct page *, unsigned int, unsigned int);
extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page); extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_stop(handle_t *);
extern int jbd2_journal_flush (journal_t *); extern int jbd2_journal_flush(journal_t *journal, unsigned int flags);
extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_lock_updates (journal_t *);
extern void jbd2_journal_unlock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *);