From 15d3042a937c13f5d9244241c7a9c8416ff6e82a Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 15 May 2017 10:45:08 -0700 Subject: [PATCH 01/59] f2fs: sanity check checkpoint segno and blkoff Make sure segno and blkoff read from raw image are valid. Cc: stable@vger.kernel.org Signed-off-by: Jin Qian [Jaegeuk Kim: adjust minor coding style] Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 83355ec4a92c..397b1e816b36 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1521,6 +1521,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned int ovp_segments, reserved_segments; + unsigned int main_segs, blocks_per_seg; + int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -1542,6 +1544,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + main_segs = le32_to_cpu(raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) + return 1; + } + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; From 93607124c5450148e592c3d18ac533b4e5f25b8b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 May 2017 13:20:16 -0700 Subject: [PATCH 02/59] f2fs: load inode's flag from disk This patch fixes missing inode flag loaded from disk, reported by Tom. [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/ [tom@localhost ~]$ sudo chown tom:tom /mnt/ [tom@localhost ~]$ touch /mnt/testfile [tom@localhost ~]$ sudo chattr +i /mnt/testfile [tom@localhost ~]$ echo test > /mnt/testfile bash: /mnt/testfile: Operation not permitted [tom@localhost ~]$ rm /mnt/testfile rm: cannot remove '/mnt/testfile': Operation not permitted [tom@localhost ~]$ sudo umount /mnt/ [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/ [tom@localhost ~]$ lsattr /mnt/testfile ----i-------------- /mnt/testfile [tom@localhost ~]$ echo test > /mnt/testfile [tom@localhost ~]$ rm /mnt/testfile [tom@localhost ~]$ sudo umount /mnt/ Cc: stable@vger.kernel.org Reported-by: Tom Yan Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + fs/f2fs/inode.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index abb0403d3414..9b3e7635222c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1493,6 +1493,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, false); inode_unlock(inode); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 518f49643092..e53c784ab11e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -44,7 +44,6 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - f2fs_mark_inode_dirty_sync(inode, false); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -226,6 +225,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = -EIO; goto bad_inode; } + f2fs_set_inode_flags(inode); unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; From 1c6d8ee4b8aaadc3645497658007ca007312351d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 May 2017 23:59:12 +0800 Subject: [PATCH 03/59] f2fs: support statx Last kernel has already support new syscall statx() in commit a528d35e8bfc ("statx: Add a system call to make enhanced file info available"), with this interface we can show more file info including file creation and some attribute flags to user. This patch tries to support this functionality. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9b3e7635222c..8ccbfe53c03c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -633,9 +633,30 @@ int f2fs_truncate(struct inode *inode) } int f2fs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + + flags = fi->i_flags & FS_FL_USER_VISIBLE; + if (flags & FS_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (f2fs_encrypted_inode(inode)) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & FS_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & FS_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + generic_fillattr(inode, stat); stat->blocks <<= 3; return 0; From e5dbd9563e5528f98728ba0bc8361f804ace5aae Mon Sep 17 00:00:00 2001 From: Weichao Guo Date: Thu, 11 May 2017 04:28:00 +0800 Subject: [PATCH 04/59] f2fs: make sure f2fs_gc returns consistent errno By default, f2fs_gc returns -EINVAL in general error cases, e.g., no victim was selected. However, the default errno may be overwritten in two cases: gc_more and BG_GC -> FG_GC. We should return consistent errno in such cases. Signed-off-by: Weichao Guo Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 026522107ca3..965fbf5d0a2e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -955,7 +955,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, { int gc_type = sync ? FG_GC : BG_GC; int sec_freed = 0; - int ret = -EINVAL; + int ret; struct cp_control cpc; unsigned int init_segno = segno; struct gc_inode_list gc_list = { @@ -965,8 +965,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, cpc.reason = __get_cp_reason(sbi); gc_more: - if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) + if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) { + ret = -EINVAL; goto stop; + } if (unlikely(f2fs_cp_error(sbi))) { ret = -EIO; goto stop; @@ -987,6 +989,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, gc_type = FG_GC; } + ret = -EINVAL; /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ if (gc_type == BG_GC && !background) goto stop; From 1919ffc0d7a300d4d8002e92ab3c6dea1974defc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:23:36 -0700 Subject: [PATCH 05/59] f2fs: use f2fs_submit_page_bio for ra_meta_pages This patch avoids to use f2fs_submit_merged_bio for read, which was the only read case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ea9c317b5916..8d92f8249000 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -207,12 +207,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - fio.old_blkaddr = fio.new_blkaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_bio(&fio); f2fs_put_page(page, 0); } out: - f2fs_submit_merged_bio(sbi, META, READ); blk_finish_plug(&plug); return blkno - start; } From b9109b0e49b93b0ae663330acb36561b8f4f6905 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:28:38 -0700 Subject: [PATCH 06/59] f2fs: remove unnecessary read cases in merged IO flow Merged IO flow doesn't need to care about read IOs. f2fs_submit_merged_bio -> f2fs_submit_merged_write f2fs_submit_merged_bios -> f2fs_submit_merged_writes f2fs_submit_merged_bio_cond -> f2fs_submit_merged_write_cond Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 14 +++++----- fs/f2fs/data.c | 55 ++++++++++++++++--------------------- fs/f2fs/f2fs.h | 12 ++++---- fs/f2fs/gc.c | 6 ++-- fs/f2fs/node.c | 11 ++++---- fs/f2fs/segment.c | 11 ++++---- fs/f2fs/super.c | 5 +--- include/trace/events/f2fs.h | 2 +- 8 files changed, 51 insertions(+), 65 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8d92f8249000..13828f63a871 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) set_ckpt_flags(sbi, CP_ERROR_FLAG); sbi->sb->s_flags |= MS_RDONLY; if (!end_io) - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); } /* @@ -247,13 +247,13 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, - 0, page->index, META, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, META); unlock_page(page); if (unlikely(f2fs_cp_error(sbi))) - f2fs_submit_merged_bio(sbi, META, WRITE); + f2fs_submit_merged_write(sbi, META); return 0; @@ -356,7 +356,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, } stop: if (nwritten) - f2fs_submit_merged_bio(sbi, type, WRITE); + f2fs_submit_merged_write(sbi, type); blk_finish_plug(&plug); @@ -904,7 +904,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) * We should submit bio, since it exists several * wribacking dentry pages in the freeing inode. */ - f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_write(sbi, DATA); cond_resched(); } goto retry; @@ -1293,7 +1293,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); /* this is the case of multiple fstrims without any changes */ if (cpc->reason & CP_DISCARD) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c0f6bdf817d..06bb2042385e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -291,14 +291,12 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, return ret; } -static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, +static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw) + enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io; - - io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + struct f2fs_bio_info *io = &sbi->write_io[btype]; down_write(&io->io_rwsem); @@ -318,25 +316,24 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, up_write(&io->io_rwsem); } -void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, - int rw) +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw); + __f2fs_submit_merged_write(sbi, NULL, 0, 0, type); } -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw) + enum page_type type) { if (has_merged_page(sbi, inode, ino, idx, type)) - __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw); + __f2fs_submit_merged_write(sbi, inode, ino, idx, type); } -void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) { - f2fs_submit_merged_bio(sbi, DATA, WRITE); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - f2fs_submit_merged_bio(sbi, META, WRITE); + f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_write(sbi, NODE); + f2fs_submit_merged_write(sbi, META); } /* @@ -368,16 +365,15 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } -int f2fs_submit_page_mbio(struct f2fs_io_info *fio) +int f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); - struct f2fs_bio_info *io; - bool is_read = is_read_io(fio->op); + struct f2fs_bio_info *io = &sbi->write_io[btype]; struct page *bio_page; int err = 0; - io = is_read ? &sbi->read_io : &sbi->write_io[btype]; + f2fs_bug_on(sbi, is_read_io(fio->op)); if (fio->old_blkaddr != NEW_ADDR) verify_block_addr(sbi, fio->old_blkaddr); @@ -388,8 +384,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio) /* set submitted = 1 as a return value */ fio->submitted = 1; - if (!is_read) - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + inc_page_count(sbi, WB_DATA_TYPE(bio_page)); down_write(&io->io_rwsem); @@ -402,12 +397,11 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio) if ((fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { err = -EAGAIN; - if (!is_read) - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + dec_page_count(sbi, WB_DATA_TYPE(bio_page)); goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, - BIO_MAX_PAGES, is_read); + BIO_MAX_PAGES, false); io->fio = *fio; } @@ -421,7 +415,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); out_fail: up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(fio->page, fio); + trace_f2fs_submit_page_write(fio->page, fio); return err; } @@ -1321,7 +1315,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_bios(fio->sbi); + f2fs_flush_merged_writes(fio->sbi); congestion_wait(BLK_RW_ASYNC, HZ/50); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; @@ -1513,8 +1507,7 @@ static int __write_data_page(struct page *page, bool *submitted, ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, - DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); submitted = NULL; @@ -1525,7 +1518,7 @@ static int __write_data_page(struct page *page, bool *submitted, f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -1684,8 +1677,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, mapping->writeback_index = done_index; if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA, WRITE); + f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, + 0, last_idx, DATA); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e26999a74522..58697bd588fa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -879,7 +879,6 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ /* for bio operations */ - struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ int write_io_size_bits; /* Write IO size bits */ @@ -2325,14 +2324,13 @@ void destroy_checkpoint_caches(void); /* * data.c */ -void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, - int rw); -void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, +void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); +void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, int rw); -void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi); + enum page_type type); +void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); -int f2fs_submit_page_mbio(struct f2fs_io_info *fio); +int f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 965fbf5d0a2e..67b87155bc48 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -670,7 +670,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); @@ -936,8 +936,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, } if (gc_type == FG_GC) - f2fs_submit_merged_bio(sbi, - (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); + f2fs_submit_merged_write(sbi, + (type == SUM_TYPE_NODE) ? NODE : DATA); blk_finish_plug(&plug); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 98351a4a4da3..41bb632ac2e0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1373,15 +1373,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0, - page->index, NODE, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, + page->index, NODE); submitted = NULL; } unlock_page(page); if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_write(sbi, NODE); submitted = NULL; } if (submitted) @@ -1518,8 +1518,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, } out: if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx, - NODE, WRITE); + f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); return ret ? -EIO: 0; } @@ -1625,7 +1624,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) } out: if (nwritten) - f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_write(sbi, NODE); return ret; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index de31030b5041..38bb675976e2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -328,8 +328,7 @@ static int __commit_inmem_pages(struct inode *inode, } if (last_idx != ULONG_MAX) - f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, - DATA, WRITE); + f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -2150,7 +2149,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ - err = f2fs_submit_page_mbio(fio); + err = f2fs_submit_page_write(fio); if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; @@ -2177,7 +2176,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) fio.op_flags &= ~REQ_META; set_page_writeback(page); - f2fs_submit_page_mbio(&fio); + f2fs_submit_page_write(&fio); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2296,8 +2295,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, page->mapping->host, - 0, page->index, type, WRITE); + f2fs_submit_merged_write_cond(sbi, page->mapping->host, + 0, page->index, type); if (ordered) wait_on_page_writeback(page); else diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 397b1e816b36..90599397425a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -817,7 +817,7 @@ static void f2fs_put_super(struct super_block *sb) mutex_unlock(&sbi->umount_mutex); /* our cp_error case, we can wait for any writeback page */ - f2fs_flush_merged_bios(sbi); + f2fs_flush_merged_writes(sbi); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -1966,9 +1966,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); - init_rwsem(&sbi->read_io.io_rwsem); - sbi->read_io.sbi = sbi; - sbi->read_io.bio = NULL; for (i = 0; i < NR_PAGE_TYPE; i++) { init_rwsem(&sbi->write_io[i].io_rwsem); sbi->write_io[i].sbi = sbi; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 15da88c5c3a4..5805d92893a8 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -790,7 +790,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, TP_CONDITION(page->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, TP_PROTO(struct page *page, struct f2fs_io_info *fio), From 81377bd62837c8113b1c49c5dfa6b1af8f9ee5c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 14:19:54 -0700 Subject: [PATCH 07/59] f2fs: use fio instead of multiple parameters This patch just changes using fio instead of parameters. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 38bb675976e2..c9f3a2faee21 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2039,61 +2039,62 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } -static int __get_segment_type_2(struct page *page, enum page_type p_type) +static int __get_segment_type_2(struct f2fs_io_info *fio) { - if (p_type == DATA) + if (fio->type == DATA) return CURSEG_HOT_DATA; else return CURSEG_HOT_NODE; } -static int __get_segment_type_4(struct page *page, enum page_type p_type) +static int __get_segment_type_4(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && is_cold_node(page)) + if (IS_DNODE(fio->page) && is_cold_node(fio->page)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } } -static int __get_segment_type_6(struct page *page, enum page_type p_type) +static int __get_segment_type_6(struct f2fs_io_info *fio) { - if (p_type == DATA) { - struct inode *inode = page->mapping->host; + if (fio->type == DATA) { + struct inode *inode = fio->page->mapping->host; - if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; return CURSEG_WARM_DATA; } else { - if (IS_DNODE(page)) - return is_cold_node(page) ? CURSEG_WARM_NODE : + if (IS_DNODE(fio->page)) + return is_cold_node(fio->page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } } -static int __get_segment_type(struct page *page, enum page_type p_type) +static int __get_segment_type(struct f2fs_io_info *fio) { - switch (F2FS_P_SB(page)->active_logs) { + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(page, p_type); + return __get_segment_type_2(fio); case 4: - return __get_segment_type_4(page, p_type); + return __get_segment_type_4(fio); } + /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(F2FS_P_SB(page), - F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); - return __get_segment_type_6(page, p_type); + f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE); + + return __get_segment_type_6(fio); } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, @@ -2139,7 +2140,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio->page, fio->type); + int type = __get_segment_type(fio); int err; if (fio->type == NODE || fio->type == DATA) From a912b54d3aaa011266dc266e3694f782f27233cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:18:25 -0700 Subject: [PATCH 08/59] f2fs: split bio cache Split DATA/NODE type bio cache according to different temperature, so write IOs with the same temperature can be merged in corresponding bio cache as much as possible, otherwise, different temperature write IOs submitting into one bio cache will always cause split of bio. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 57 +++++++++++++++++++++++++------------ fs/f2fs/f2fs.h | 10 ++++++- fs/f2fs/gc.c | 2 ++ fs/f2fs/segment.c | 24 ++++++++++++---- fs/f2fs/segment.h | 4 +++ fs/f2fs/super.c | 21 ++++++++++++-- include/trace/events/f2fs.h | 14 ++++++++- 7 files changed, 103 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 06bb2042385e..4ca0899621a0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -282,27 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; - bool ret; + enum temp_type temp; + struct f2fs_bio_info *io; + bool ret = false; - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); - up_read(&io->io_rwsem); + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + io = sbi->write_io[btype] + temp; + + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, ino, idx); + up_read(&io->io_rwsem); + + /* TODO: use HOT temp only for meta pages now. */ + if (ret || btype == META) + break; + } return ret; } static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + enum page_type type, enum temp_type temp) { enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_write(&io->io_rwsem); - if (!__has_merged_page(io, inode, ino, idx)) - goto out; - /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; @@ -312,21 +317,38 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); -out: up_write(&io->io_rwsem); } +static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type, bool force) +{ + enum temp_type temp; + + if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + return; + + for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + + __f2fs_submit_merged_write(sbi, type, temp); + + /* TODO: use HOT temp only for meta pages now. */ + if (type >= META) + break; + } +} + void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { - __f2fs_submit_merged_write(sbi, NULL, 0, 0, type); + __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, enum page_type type) { - if (has_merged_page(sbi, inode, ino, idx, type)) - __f2fs_submit_merged_write(sbi, inode, ino, idx, type); + __submit_merged_write_cond(sbi, inode, ino, idx, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -369,7 +391,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; + struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; int err = 0; @@ -405,8 +427,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) io->fio = *fio; } - if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { __submit_merged_bio(io); goto alloc_new; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 58697bd588fa..c6643783adff 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -792,9 +792,17 @@ enum page_type { OPU, }; +enum temp_type { + HOT = 0, /* must be zero for meta bio */ + WARM, + COLD, + NR_TEMP_TYPE, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ + enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ int op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ @@ -879,7 +887,7 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ /* for bio operations */ - struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ + struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 67b87155bc48..e2b13558a915 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -586,6 +586,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, @@ -712,6 +713,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .type = DATA, + .temp = COLD, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c9f3a2faee21..fcada9d03817 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2084,17 +2084,29 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) static int __get_segment_type(struct f2fs_io_info *fio) { + int type = 0; + switch (fio->sbi->active_logs) { case 2: - return __get_segment_type_2(fio); + type = __get_segment_type_2(fio); + break; case 4: - return __get_segment_type_4(fio); + type = __get_segment_type_4(fio); + break; + case 6: + type = __get_segment_type_6(fio); + break; + default: + f2fs_bug_on(fio->sbi, true); } - /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE); - - return __get_segment_type_6(fio); + if (IS_HOT(type)) + fio->temp = HOT; + else if (IS_WARM(type)) + fio->temp = WARM; + else + fio->temp = COLD; + return type; } void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 10bf05d4cff4..e9ba1f1d9723 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -27,6 +27,10 @@ #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) +#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) +#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) +#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) + #define IS_CURSEG(sbi, seg) \ (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 90599397425a..aa451ec9fb80 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -768,6 +768,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); @@ -838,6 +839,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); destroy_percpu_info(sbi); + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); kfree(sbi); } @@ -1967,9 +1970,19 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) spin_lock_init(&sbi->stat_lock); for (i = 0; i < NR_PAGE_TYPE; i++) { - init_rwsem(&sbi->write_io[i].io_rwsem); - sbi->write_io[i].sbi = sbi; - sbi->write_io[i].bio = NULL; + int n = (i == META) ? 1: NR_TEMP_TYPE; + int j; + + sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), + GFP_KERNEL); + if (!sbi->write_io[i]) + goto free_options; + + for (j = HOT; j < n; j++) { + init_rwsem(&sbi->write_io[i][j].io_rwsem); + sbi->write_io[i][j].sbi = sbi; + sbi->write_io[i][j].bio = NULL; + } } init_rwsem(&sbi->cp_rwsem); @@ -2215,6 +2228,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_options: + for (i = 0; i < NR_PAGE_TYPE; i++) + kfree(sbi->write_io[i]); destroy_percpu_info(sbi); kfree(options); free_sb_buf: diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 5805d92893a8..6f77a2755abb 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE); TRACE_DEFINE_ENUM(INMEM_REVOKE); TRACE_DEFINE_ENUM(IPU); TRACE_DEFINE_ENUM(OPU); +TRACE_DEFINE_ENUM(HOT); +TRACE_DEFINE_ENUM(WARM); +TRACE_DEFINE_ENUM(COLD); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); TRACE_DEFINE_ENUM(CURSEG_WARM_DATA); TRACE_DEFINE_ENUM(CURSEG_COLD_DATA); @@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) +#define show_block_temp(temp) \ + __print_symbolic(temp, \ + { HOT, "HOT" }, \ + { WARM, "WARM" }, \ + { COLD, "COLD" }) + #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_PREFLUSH | REQ_FUA) #define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) @@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(block_t, new_blkaddr) __field(int, op) __field(int, op_flags) + __field(int, temp) __field(int, type) ), @@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; __entry->op_flags = fio->op_flags; + __entry->temp = fio->temp; __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, (unsigned long long)__entry->new_blkaddr, show_bio_type(__entry->op, __entry->op_flags), + show_block_temp(__entry->temp), show_block_type(__entry->type)) ); From cc15620bc826b14006956fd321e026ae96aff53a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 May 2017 13:51:34 -0700 Subject: [PATCH 09/59] f2fs: avoid f2fs_lock_op for IPU writes Currently, if we do get_node_of_data before f2fs_lock_op, there may be dead lock as follows, where process A would be in infinite loop, and B will NOT be awaked. Process A(cp): Process B: f2fs_lock_all(sbi) get_dnode_of_data <---- lock dn.node_page flush_nodes f2fs_lock_op So, this patch adds f2fs_trylock_op to avoid f2fs_lock_op done by IPU. Signed-off-by: Hou Pengyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 13 ++++++++++++- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 2 +- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4ca0899621a0..779a306858a2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1383,12 +1383,12 @@ int do_write_data_page(struct f2fs_io_info *fio) if (valid_ipu_blkaddr(fio)) { ipu_force = true; - fio->need_lock = false; + fio->need_lock = LOCK_DONE; goto got_it; } } - if (fio->need_lock) + if (fio->need_lock == LOCK_REQ) f2fs_lock_op(fio->sbi); err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); @@ -1403,19 +1403,18 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } got_it: - err = encrypt_one_page(fio); - if (err) - goto out_writepage; - - set_page_writeback(page); - /* * If current allocation needs SSR, * it had better in-place writes for updated data. */ if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); f2fs_put_dnode(&dn); - if (fio->need_lock) + if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = rewrite_data_page(fio); trace_f2fs_do_write_data_page(fio->page, IPU); @@ -1423,6 +1422,20 @@ int do_write_data_page(struct f2fs_io_info *fio) return err; } + if (fio->need_lock == LOCK_RETRY) { + if (!f2fs_trylock_op(fio->sbi)) { + err = -EAGAIN; + goto out_writepage; + } + fio->need_lock = LOCK_REQ; + } + + err = encrypt_one_page(fio); + if (err) + goto out_writepage; + + set_page_writeback(page); + /* LFS mode write path */ write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); @@ -1432,7 +1445,7 @@ int do_write_data_page(struct f2fs_io_info *fio) out_writepage: f2fs_put_dnode(&dn); out: - if (fio->need_lock) + if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); return err; } @@ -1458,7 +1471,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, - .need_lock = true, + .need_lock = LOCK_RETRY, }; trace_f2fs_writepage(page, DATA); @@ -1494,7 +1507,7 @@ static int __write_data_page(struct page *page, bool *submitted, /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - fio.need_lock = false; + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); goto done; } @@ -1513,8 +1526,13 @@ static int __write_data_page(struct page *page, bool *submitted, goto out; } - if (err == -EAGAIN) + if (err == -EAGAIN) { err = do_write_data_page(&fio); + if (err == -EAGAIN) { + fio.need_lock = LOCK_REQ; + err = do_write_data_page(&fio); + } + } if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c6643783adff..f61e095947c8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -799,6 +799,12 @@ enum temp_type { NR_TEMP_TYPE, }; +enum need_lock_type { + LOCK_REQ = 0, + LOCK_DONE, + LOCK_RETRY, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ @@ -810,7 +816,7 @@ struct f2fs_io_info { struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ bool submitted; /* indicate IO submission */ - bool need_lock; /* indicate we need to lock cp_rwsem */ + int need_lock; /* indicate we need to lock cp_rwsem */ }; #define is_read_io(rw) ((rw) == READ) @@ -1279,6 +1285,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) down_read(&sbi->cp_rwsem); } +static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) +{ + return down_read_trylock(&sbi->cp_rwsem); +} + static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { up_read(&sbi->cp_rwsem); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e2b13558a915..14c71ac76062 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -719,7 +719,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, - .need_lock = true, + .need_lock = LOCK_REQ, }; bool is_dirty = PageDirty(page); int err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fcada9d03817..3bc36769ec9f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -312,7 +312,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; - fio.need_lock = false, + fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { unlock_page(page); From bd80a4b9812c0d74ecfc0b1b14ca77732faa2259 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Wed, 17 May 2017 02:48:48 +0000 Subject: [PATCH 10/59] f2fs: declare load_free_nid_bitmap static Signed-off-by: Hou Pengyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 41bb632ac2e0..d22db8ce0a69 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2552,7 +2552,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; } -inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) +static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int i = 0; From b7b7c4cf1c9ef0272a65f1480457cbfdadcda19d Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 17 May 2017 17:22:51 +0800 Subject: [PATCH 11/59] f2fs: add a new function get_ssr_cost This patch add a new method get_ssr_cost to select SSR segment more accurately. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 14c71ac76062..81392970fb2d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -258,11 +258,20 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, valid_blocks * 2 : valid_blocks; } +static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + + return se->ckpt_valid_blocks > se->valid_blocks ? + se->ckpt_valid_blocks : se->valid_blocks; +} + static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { if (p->alloc_mode == SSR) - return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + return get_ssr_cost(sbi, segno); /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) From 1d7be2708277edfef95171d52fb65ee26eaa076b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 May 2017 10:36:58 -0700 Subject: [PATCH 12/59] f2fs: try to freeze in gc and discard threads This allows to freeze gc and discard threads. Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 9 +++++---- fs/f2fs/segment.c | 25 ++++++++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 81392970fb2d..570480571d72 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -32,13 +32,14 @@ static int gc_thread_func(void *data) wait_ms = gc_th->min_sleep_time; + set_freezable(); do { + wait_event_interruptible_timeout(*wq, + kthread_should_stop() || freezing(current), + msecs_to_jiffies(wait_ms)); + if (try_to_freeze()) continue; - else - wait_event_interruptible_timeout(*wq, - kthread_should_stop(), - msecs_to_jiffies(wait_ms)); if (kthread_should_stop()) break; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3bc36769ec9f..4591239dbae2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1059,18 +1060,24 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; -repeat: - if (kthread_should_stop()) - return 0; - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + set_freezable(); - congestion_wait(BLK_RW_SYNC, HZ/50); + do { + wait_event_interruptible(*q, kthread_should_stop() || + freezing(current) || + atomic_read(&dcc->discard_cmd_cnt)); + if (try_to_freeze()) + continue; + if (kthread_should_stop()) + return 0; - wait_event_interruptible(*q, kthread_should_stop() || - atomic_read(&dcc->discard_cmd_cnt)); - goto repeat; + __issue_discard_cmd(sbi, true); + __wait_discard_cmd(sbi, true); + + congestion_wait(BLK_RW_SYNC, HZ/50); + } while (!kthread_should_stop()); + return 0; } #ifdef CONFIG_BLK_DEV_ZONED From dad48e73127ba10279ea33e6dbc8d3905c4d31c0 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 19 May 2017 15:06:12 +0800 Subject: [PATCH 13/59] f2fs: fix a bug caused by NULL extent tree Thread A: Thread B: -f2fs_remount -sbi->mount_opt.opt = 0; <--- -f2fs_iget -do_read_inode -f2fs_init_extent_tree -F2FS_I(inode)->extent_tree is NULL -default_options && parse_options -remount return <--- -f2fs_map_blocks -f2fs_lookup_extent_tree -f2fs_bug_on(sbi, !et); The same problem with f2fs_new_inode. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2f98d7039701..ff2352a0ed15 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -320,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode, } /* return true, if inode page is changed */ -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; @@ -358,6 +358,16 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) return false; } +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +{ + bool ret = __f2fs_init_extent_tree(inode, i_ext); + + if (!F2FS_I(inode)->extent_tree) + set_inode_flag(inode, FI_NO_EXTENT); + + return ret; +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { From 963932a93ceb6bdb0d45572056d8daebf2948cd0 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 19 May 2017 14:42:12 +0800 Subject: [PATCH 14/59] f2fs: combine huge num of discard rb tree consistence checks Came across a hungtask caused by huge number of rb tree traversing during adding discard addrs in cp. This patch combine these consistence checks and move it to discard thread. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4591239dbae2..7ac3a0c84bfd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -838,7 +838,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->len = blkaddr - dc->lstart; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; } @@ -848,16 +847,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } else { dc->lstart++; dc->len--; dc->start++; dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } } } @@ -918,8 +913,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -935,16 +928,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); merged = true; } if (!merged) { __insert_discard_tree(sbi, bdev, di.lstart, di.start, di.len, NULL, NULL); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); } next: prev_dc = next_dc; @@ -983,6 +972,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) int i, iter = 0; mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { pend_list = &dcc->pend_list[i]; From e41e6d75e5010741f01f8aa4f77a5e8a1786652d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:37:00 +0800 Subject: [PATCH 15/59] f2fs: split wio_mutex Split wio_mutex to adjust different temperature bio cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f61e095947c8..093d68a7ae47 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -894,7 +894,8 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ + struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; + /* bio ordering for NODE/DATA */ int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7ac3a0c84bfd..63850e023c10 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2154,7 +2154,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) int err; if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type]); + mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); @@ -2167,7 +2167,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) } if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type]); + mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index aa451ec9fb80..b700766d0cbf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1571,7 +1571,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i; + int i, j; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1603,8 +1603,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - mutex_init(&sbi->wio_mutex[NODE]); - mutex_init(&sbi->wio_mutex[DATA]); + for (i = 0; i < NR_PAGE_TYPE - 1; i++) + for (j = HOT; j < NR_TEMP_TYPE; j++) + mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); } From fb830fc5cfc90ba8236921aacb72c6d70bf78af7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:37:01 +0800 Subject: [PATCH 16/59] f2fs: introduce io_list for serialize data/node IOs Serialize data/node IOs by using fifo list instead of mutex lock, it will help to enhance concurrency of f2fs, meanwhile keeping LFS IO semantics. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/data.c | 24 ++++++++++++++++++++---- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/gc.c | 3 ++- fs/f2fs/segment.c | 22 +++++++++++++++------- fs/f2fs/super.c | 2 ++ 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 13828f63a871..12559a4b6c24 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -162,6 +162,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op = REQ_OP_READ, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, + .in_list = false, }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 779a306858a2..2ed90f5db832 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -397,6 +397,20 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); + down_write(&io->io_rwsem); +next: + if (fio->in_list) { + spin_lock(&io->io_lock); + if (list_empty(&io->io_list)) { + spin_unlock(&io->io_lock); + goto out_fail; + } + fio = list_first_entry(&io->io_list, + struct f2fs_io_info, list); + list_del(&fio->list); + spin_unlock(&io->io_lock); + } + if (fio->old_blkaddr != NEW_ADDR) verify_block_addr(sbi, fio->old_blkaddr); verify_block_addr(sbi, fio->new_blkaddr); @@ -408,8 +422,6 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - down_write(&io->io_rwsem); - if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) || !__same_bdev(sbi, fio->new_blkaddr, io->bio))) @@ -434,9 +446,13 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio) io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); + + trace_f2fs_submit_page_write(fio->page, fio); + + if (fio->in_list) + goto next; out_fail: up_write(&io->io_rwsem); - trace_f2fs_submit_page_write(fio->page, fio); return err; } @@ -749,7 +765,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, - &sum, CURSEG_WARM_DATA); + &sum, CURSEG_WARM_DATA, NULL, false); set_data_blkaddr(dn); /* update i_size */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 093d68a7ae47..cd777cf30be2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -815,8 +815,10 @@ struct f2fs_io_info { block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct list_head list; /* serialize IOs */ bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ + bool in_list; /* indicate fio is in io_list */ }; #define is_read_io(rw) ((rw) == READ) @@ -826,6 +828,8 @@ struct f2fs_bio_info { sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ struct rw_semaphore io_rwsem; /* blocking op for bio */ + spinlock_t io_lock; /* serialize DATA/NODE IOs */ + struct list_head io_list; /* track fios */ }; #define FDEV(i) (sbi->devs[i]) @@ -2294,7 +2298,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, bool recover_newaddr); void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type); + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 570480571d72..fa3d2e2df8e7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -600,6 +600,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, .op = REQ_OP_READ, .op_flags = 0, .encrypted_page = NULL, + .in_list = false, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -643,7 +644,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, CURSEG_COLD_DATA); + &sum, CURSEG_COLD_DATA, NULL, false); fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 63850e023c10..a3766bc1ba4d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2109,7 +2109,8 @@ static int __get_segment_type(struct f2fs_io_info *fio) void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, int type) + struct f2fs_summary *sum, int type, + struct f2fs_io_info *fio, bool add_list) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2145,6 +2146,17 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (page && IS_NODESEG(type)) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + if (add_list) { + struct f2fs_bio_info *io; + + INIT_LIST_HEAD(&fio->list); + fio->in_list = true; + io = sbi->write_io[fio->type] + fio->temp; + spin_lock(&io->io_lock); + list_add_tail(&fio->list, &io->io_list); + spin_unlock(&io->io_lock); + } + mutex_unlock(&curseg->curseg_mutex); } @@ -2153,11 +2165,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) int type = __get_segment_type(fio); int err; - if (fio->type == NODE || fio->type == DATA) - mutex_lock(&fio->sbi->wio_mutex[fio->type][fio->temp]); reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type); + &fio->new_blkaddr, sum, type, fio, true); /* writeout dirty page into bdev */ err = f2fs_submit_page_write(fio); @@ -2165,9 +2175,6 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) fio->old_blkaddr = fio->new_blkaddr; goto reallocate; } - - if (fio->type == NODE || fio->type == DATA) - mutex_unlock(&fio->sbi->wio_mutex[fio->type][fio->temp]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2181,6 +2188,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .new_blkaddr = page->index, .page = page, .encrypted_page = NULL, + .in_list = false, }; if (unlikely(page->index >= MAIN_BLKADDR(sbi))) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b700766d0cbf..d6af34d1e6a8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1983,6 +1983,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) init_rwsem(&sbi->write_io[i][j].io_rwsem); sbi->write_io[i][j].sbi = sbi; sbi->write_io[i][j].bio = NULL; + spin_lock_init(&sbi->write_io[i][j].io_lock); + INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); } } From 04dfc23006a200865132ef404778a07b896a0280 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:43 +0800 Subject: [PATCH 17/59] f2fs: show more info if fail to issue discard Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a3766bc1ba4d..22cca2699095 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -741,7 +741,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", dc->error); + "Issue discard(%u, %u, %u) failed, ret: %d", + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } From e31b98215779e66a490471c6ad886ae231316699 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:44 +0800 Subject: [PATCH 18/59] f2fs: wake up all waiters in f2fs_submit_discard_endio There could be more than one waiter waiting discard IO completion, so we need use complete_all() instead of complete() in f2fs_submit_discard_endio to avoid hungtask. Fixes: ec9895add2c5 ("f2fs: don't hold cmd_lock during waiting discard command") Cc: Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 22cca2699095..24a2d5ab6f45 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -752,7 +752,7 @@ static void f2fs_submit_discard_endio(struct bio *bio) dc->error = bio->bi_error; dc->state = D_DONE; - complete(&dc->wait); + complete_all(&dc->wait); bio_put(bio); } From 6afae6336ac9c19b5956c003e882b1ee4bca2a9d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 May 2017 23:46:45 +0800 Subject: [PATCH 19/59] f2fs: wait discard IO completion without cmd_lock held Wait discard IO completion outside cmd_lock to avoid long latency of holding cmd_lock in IO busy scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 24a2d5ab6f45..a8f4c8146714 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -997,17 +997,34 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; + bool need_wait; + +next: + need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || dc->state == D_DONE) { - if (dc->ref) - continue; + if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); + } else { + dc->ref++; + need_wait = true; + break; } } mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); + goto next; + } } /* This should be covered by global mutex, &sit_i->sentry_lock */ From b82a6ea6ec39f1b5ef949a38e334eed0dc29f4d1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 22 May 2017 17:39:43 -0700 Subject: [PATCH 20/59] f2fs: don't bother checking for encryption key in ->mmap() Since only an open file can be mmap'ed, and we only allow open()ing an encrypted file when its key is available, there is no need to check for the key again before permitting each mmap(). This f2fs copy of this code was also broken in that it wouldn't actually have failed if the key was in fact unavailable. Signed-off-by: Eric Biggers Reviewed-by: David Gstir Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8ccbfe53c03c..30fbb4ce6a73 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -415,14 +415,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return 0; - if (!f2fs_encrypted_inode(inode)) - return -ENOKEY; - } - /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) From aaebdee8b88225ebe28af2afc60446f9fd7228f9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 22 May 2017 17:39:45 -0700 Subject: [PATCH 21/59] f2fs: don't bother checking for encryption key in ->write_iter() Since only an open file can be written to, and we only allow open()ing an encrypted file when its key is available, there is no need to check for the key again before permitting each ->write_iter(). This code was also broken in that it wouldn't actually have failed if the key was in fact unavailable. Signed-off-by: Eric Biggers Reviewed-by: David Gstir Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 30fbb4ce6a73..65915b4ce14b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2340,11 +2340,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; - if (f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; - inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { From acfd2810c75b0625897fc119a2d3a9c26cc0e405 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 26 May 2017 17:04:40 +0900 Subject: [PATCH 22/59] f2fs: Do not issue small discards in LFS mode clear_prefree_segments() issues small discards after discarding full segments. These small discards may not be section aligned, so not zone aligned on a zoned block device, causing __f2fs_iissue_discard_zone() to fail. Fix this by not issuing small discards for a volume mounted with the BLKZONED feature enabled. Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a8f4c8146714..de6738b82745 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1337,7 +1337,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (force && len < cpc->trim_minlen) + if (f2fs_sb_mounted_blkzoned(sbi->sb) || + (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, From d8c4256c17dac3e9ec0c441b81292d5d2044c89f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 1 Jun 2017 15:39:27 -0700 Subject: [PATCH 23/59] f2fs: remove false-positive bug_on For example, f2fs_create - new_node_page is failed - handle_failed_inode - skip to add it into orphan list, since ni.blk_addr == NULL_ADDR : set_inode_flag(inode, FI_FREE_NID) f2fs_evict_inode - EIO due to fault injection - f2fs_bug_on() is triggered So, we don't need to call f2fs_bug_on in this case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e53c784ab11e..868d71436ebc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -425,9 +425,10 @@ void f2fs_evict_inode(struct inode *inode) if (is_inode_flag_set(inode, FI_FREE_NID)) { alloc_nid_failed(sbi, inode->i_ino); clear_inode_flag(inode, FI_FREE_NID); + } else { + f2fs_bug_on(sbi, err && + !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); } - f2fs_bug_on(sbi, err && - !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); clear_inode(inode); From 68390dd9bd5b2ef7c3ec69c23010b39981e264a4 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Thu, 1 Jun 2017 16:50:10 +0800 Subject: [PATCH 24/59] f2fs: remove the unnecessary cast for PTR_ERR It's not necessary to specify 'int' casting for PTR_ERR. Signed-off-by: Zhang Shengju Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 8f487692c21f..a140c5e3dc54 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -233,7 +233,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size); if (IS_ERR(value)) { clear_inode_flag(inode, FI_ACL_MODE); - return (int)PTR_ERR(value); + return PTR_ERR(value); } } From d4fdf8ba0e5808ba9ad6b44337783bd9935e0982 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 1 Jun 2017 16:43:51 +0800 Subject: [PATCH 25/59] f2fs: fix a panic caused by NULL flush_cmd_control Mount fs with option noflush_merge, boot failed for illegal address fcc in function f2fs_issue_flush: if (!test_opt(sbi, FLUSH_MERGE)) { ret = submit_flush_wait(sbi); atomic_inc(&fcc->issued_flush); -> Here, fcc illegal return ret; } Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index de6738b82745..c1026b78cdfc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -566,6 +566,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; + if (!test_opt(sbi, FLUSH_MERGE)) + return err; + init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); @@ -3240,7 +3243,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) return err; From 21d3f8e1c3b7996ce239ab6fa82e9f7a8c47d84d Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 1 Jun 2017 11:18:30 -0700 Subject: [PATCH 26/59] f2fs: sanity check size of nat and sit cache Make sure number of entires doesn't exceed max journal size. Cc: stable@vger.kernel.org Signed-off-by: Jin Qian Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c1026b78cdfc..a739803b6513 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2495,6 +2495,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { + struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; + struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; int type = CURSEG_HOT_DATA; int err; @@ -2521,6 +2523,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) return err; } + /* sanity check for summary blocks */ + if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + return -EINVAL; + return 0; } From 72fdbe2efe3e42a54e268d2ee2a8c0828d3996e7 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Fri, 2 Jun 2017 15:45:42 +0800 Subject: [PATCH 27/59] f2fs: simplify the way of calulating next nat address The index of segment which the next nat block is in has only one different bit than the current one, so to get the next nat address, we can simply alter that one bit. Signed-off-by: Fan Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 558048e33cf9..bb53e9955ff2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -224,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; - if ((block_addr >> sbi->log_blocks_per_seg) % 2) - block_addr -= sbi->blocks_per_seg; - else - block_addr += sbi->blocks_per_seg; - + block_addr ^= 1 << sbi->log_blocks_per_seg; return block_addr + nm_i->nat_blkaddr; } From 5a3a2d83cda82df7f8c306df85647d2c368e829a Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Thu, 18 May 2017 11:06:45 +0800 Subject: [PATCH 28/59] f2fs: dax: fix races between page faults and truncating pages Currently in F2FS, page faults and operations that truncate the pagecahe or data blocks, are completely unsynchronized. This can result in page fault faulting in a page into a range that we are changing after truncating, and thus we can end up with a page mapped to disk blocks that will be shortly freed. Filesystem corruption will shortly follow. This patch fixes the problem by creating new rw semaphore i_mmap_sem in f2fs_inode_info and grab it for functions removing blocks from extent tree and for read over page faults. The mechanism is similar to that in ext4. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 48 +++++++++++++++++++++++++++++++++++++++--------- fs/f2fs/super.c | 1 + 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2ed90f5db832..7d3af48d34a9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1801,8 +1801,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) loff_t i_size = i_size_read(inode); if (to > i_size) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); truncate_blocks(inode, i_size, true); + up_write(&F2FS_I(inode)->i_mmap_sem); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cd777cf30be2..da70964cbd74 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -519,6 +519,7 @@ struct f2fs_inode_info { struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ + struct rw_semaphore i_mmap_sem; }; static inline void get_extent_info(struct extent_info *ext, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 65915b4ce14b..ac8b943817e6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -33,6 +33,18 @@ #include "trace.h" #include +static int f2fs_filemap_fault(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + int err; + + down_read(&F2FS_I(inode)->i_mmap_sem); + err = filemap_fault(vmf); + up_read(&F2FS_I(inode)->i_mmap_sem); + + return err; +} + static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; @@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vmf->vma->vm_file); + down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || !PageUptodate(page))) { unlock_page(page); err = -EFAULT; - goto out; + goto out_sem; } /* @@ -94,6 +107,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); +out_sem: + up_read(&F2FS_I(inode)->i_mmap_sem); out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); @@ -101,7 +116,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) } static const struct vm_operations_struct f2fs_file_vm_ops = { - .fault = filemap_fault, + .fault = f2fs_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, }; @@ -700,8 +715,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return -EACCES; if (attr->ia_size <= i_size_read(inode)) { + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); err = f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); if (err) return err; } else { @@ -709,7 +726,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * do not trim all blocks after i_size if target size is * larger than i_size. */ + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); + up_write(&F2FS_I(inode)->i_mmap_sem); /* should convert inline inode here */ if (!f2fs_may_inline_data(inode)) { @@ -852,12 +871,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); truncate_inode_pages_range(mapping, blk_start, blk_end - 1); f2fs_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); + up_write(&F2FS_I(inode)->i_mmap_sem); } } @@ -1096,16 +1117,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - return ret; + goto out; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1118,6 +1140,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1182,9 +1206,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) - return ret; + goto out_sem; truncate_pagecache_range(inode, offset, offset + len - 1); @@ -1198,7 +1223,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start, off_start, off_end - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, offset + len); } else { @@ -1206,7 +1231,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = fill_zero(inode, pg_start++, off_start, PAGE_SIZE - off_start); if (ret) - return ret; + goto out_sem; new_size = max_t(loff_t, new_size, (loff_t)pg_start << PAGE_SHIFT); @@ -1255,6 +1280,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, out: if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); +out_sem: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } @@ -1284,14 +1311,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) - return ret; + goto out; /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - return ret; + goto out; truncate_pagecache(inode, offset); @@ -1320,6 +1348,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); return ret; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d6af34d1e6a8..ddd2973ffcbf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); init_rwsem(&fi->dio_rwsem[WRITE]); + init_rwsem(&fi->i_mmap_sem); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; From 2a510c005c9d3fafbaae1a6d27da8256c95542f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Jun 2017 18:29:06 +0800 Subject: [PATCH 29/59] f2fs: introduce __wait_one_discard_bio In order to avoid copied codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a739803b6513..0fa717a47394 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -995,6 +995,20 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) mutex_unlock(&dcc->cmd_lock); } +static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); +} + static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1019,13 +1033,7 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) mutex_unlock(&dcc->cmd_lock); if (need_wait) { - wait_for_completion_io(&dc->wait); - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, dc->state != D_DONE); - dc->ref--; - if (!dc->ref) - __remove_discard_cmd(sbi, dc); - mutex_unlock(&dcc->cmd_lock); + __wait_one_discard_bio(sbi, dc); goto next; } } @@ -1049,15 +1057,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) } mutex_unlock(&dcc->cmd_lock); - if (need_wait) { - wait_for_completion_io(&dc->wait); - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, dc->state != D_DONE); - dc->ref--; - if (!dc->ref) - __remove_discard_cmd(sbi, dc); - mutex_unlock(&dcc->cmd_lock); - } + if (need_wait) + __wait_one_discard_bio(sbi, dc); } /* This comes from f2fs_put_super */ From d9703d9097d7c97f735a3b7870c52735a4dfa051 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Jun 2017 18:29:07 +0800 Subject: [PATCH 30/59] f2fs: add f2fs_bug_on in __remove_discard_cmd Recently, discard related codes have changed a lot, so add f2fs_bug_on to detect potential bug. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0fa717a47394..86a0c1095939 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -739,6 +739,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + f2fs_bug_on(sbi, dc->ref); + if (dc->error == -EOPNOTSUPP) dc->error = 0; From febeca6d375531f2d3244b47bf0eb396180689e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Jun 2017 18:29:08 +0800 Subject: [PATCH 31/59] f2fs: don't track newly allocated nat entry in list We will never persist newly allocated nat entries during checkpoint(), so we don't need to track such nat entries in nat dirty list in order to avoid: - more latency during traversing dirty list; - sorting nat sets incorrectly due to recording wrong entry_cnt in nat entry set. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d22db8ce0a69..05700e54f91e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -158,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; - if (get_nat_flag(ne, IS_DIRTY)) - return; - head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); @@ -171,10 +168,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } - list_move_tail(&ne->list, &head->entry_list); + + if (get_nat_flag(ne, IS_DIRTY)) + goto refresh_list; + nm_i->dirty_nat_cnt++; head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); +refresh_list: + if (nat_get_blkaddr(ne) == NEW_ADDR) + list_del_init(&ne->list); + else + list_move_tail(&ne->list, &head->entry_list); } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, @@ -2423,8 +2428,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, nid_t nid = nat_get_nid(ne); int offset; - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; + f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR); if (to_journal) { offset = lookup_journal_in_cursum(journal, From 1f258ec13b82d3d947b515a007a748ffcbe29f9a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 7 Jun 2017 11:17:35 +0800 Subject: [PATCH 32/59] f2fs: fix to avoid panic when encountering corrupt node With fault_injection option, generic/361 of fstests will complain us with below message: Call Trace: get_node_page+0x12/0x20 [f2fs] f2fs_iget+0x92/0x7d0 [f2fs] f2fs_fill_super+0x10fb/0x15e0 [f2fs] mount_bdev+0x184/0x1c0 f2fs_mount+0x15/0x20 [f2fs] mount_fs+0x39/0x150 vfs_kern_mount+0x67/0x110 do_mount+0x1bb/0xc70 SyS_mount+0x83/0xd0 do_syscall_64+0x6e/0x160 entry_SYSCALL64_slow_path+0x25/0x25 Since mkfs loop device in f2fs partition can be failed silently due to checkpoint error injection, so root inode page can be corrupted, in order to avoid needless panic, in get_node_page, it's better to leave message and return error to caller, and let fsck repaire it later. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 05700e54f91e..f522378224aa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1157,6 +1157,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, f2fs_put_page(page, 1); return ERR_PTR(err); } else if (err == LOCKED_PAGE) { + err = 0; goto page_hit; } @@ -1170,15 +1171,22 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, goto repeat; } - if (unlikely(!PageUptodate(page))) + if (unlikely(!PageUptodate(page))) { + err = -EIO; goto out_err; + } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_bug_on(sbi, 1); + f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " + "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); ClearPageUptodate(page); + err = -EINVAL; out_err: f2fs_put_page(page, 1); - return ERR_PTR(-EIO); + return ERR_PTR(err); } return page; } From a005774c8dab68d1c3963aee7d9464c3ec400eba Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 9 Jun 2017 06:32:54 +0800 Subject: [PATCH 33/59] f2fs: use proper variable name It is better to use variable name "inline_dentry" instead of "dentry_blk" when data type is "struct f2fs_inline_dentry". This patch has no functional changes, just to make code more readable especially when call the function make_dentry_ptr_inline() and f2fs_convert_inline_dir(). Signed-off-by: Tiezhu Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e4c527c4e7d0..e0fd4376e6fb 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -316,12 +316,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { - struct f2fs_inline_dentry *dentry_blk; + struct f2fs_inline_dentry *inline_dentry; struct f2fs_dentry_ptr d; - dentry_blk = inline_data_addr(ipage); + inline_dentry = inline_data_addr(ipage); - make_dentry_ptr_inline(NULL, &d, dentry_blk); + make_dentry_ptr_inline(NULL, &d, inline_dentry); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_inline_dentry *dentry_blk = NULL; + struct f2fs_inline_dentry *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; @@ -510,11 +510,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (IS_ERR(ipage)) return PTR_ERR(ipage); - dentry_blk = inline_data_addr(ipage); - bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + inline_dentry = inline_data_addr(ipage); + bit_pos = room_for_filename(&inline_dentry->dentry_bitmap, slots, NR_INLINE_DENTRY); if (bit_pos >= NR_INLINE_DENTRY) { - err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; @@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true); name_hash = f2fs_dentry_hash(new_name, NULL); - make_dentry_ptr_inline(NULL, &d, dentry_blk); + make_dentry_ptr_inline(NULL, &d, inline_dentry); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -586,14 +586,14 @@ bool f2fs_empty_inline_dir(struct inode *dir) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos = 2; - struct f2fs_inline_dentry *dentry_blk; + struct f2fs_inline_dentry *inline_dentry; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return false; - dentry_blk = inline_data_addr(ipage); - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + inline_dentry = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, NR_INLINE_DENTRY, bit_pos); From b63def9112cd8b91477a06ba5318c8a01ac474f1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Jun 2017 09:21:11 +0200 Subject: [PATCH 34/59] f2fs: Fix a return value in case of error in 'f2fs_fill_super' err must be set to -ENOMEM, otherwise we return 0. Fixes: a912b54d3aaa0 ("f2fs: split bio cache") Signed-off-by: Christophe JAILLET Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ddd2973ffcbf..fe86a7edfa60 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1977,8 +1977,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), GFP_KERNEL); - if (!sbi->write_io[i]) + if (!sbi->write_io[i]) { + err = -ENOMEM; goto free_options; + } for (j = HOT; j < n; j++) { init_rwsem(&sbi->write_io[i][j].io_rwsem); From 44529f8975b7b93709b1b92be7d027a1d406de8a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Jun 2017 09:44:24 +0800 Subject: [PATCH 35/59] f2fs: fix to show injection rate in ->show_options If fault injection functionality is enabled, show additional injection rate in ->show_options. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fe86a7edfa60..2aa864eae522 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -984,7 +984,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) - seq_puts(seq, ",fault_injection"); + seq_printf(seq, ",fault_injection=%u", + sbi->fault_info.inject_rate); #endif return 0; From 6f6d9fe2ab3fc68d194b18f4d120443326ec524a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Jun 2017 09:44:26 +0800 Subject: [PATCH 36/59] f2fs: fix incorrect document of batched_trim_sections Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a809f6005f14..3cf9320f492c 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -75,7 +75,7 @@ Contact: "Jaegeuk Kim" Description: Controls the memory footprint used by f2fs. -What: /sys/fs/f2fs//trim_sections +What: /sys/fs/f2fs//batched_trim_sections Date: February 2015 Contact: "Jaegeuk Kim" Description: From 1727f317219bfc60a3e50306d67938ffedb17f8a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Jun 2017 09:44:27 +0800 Subject: [PATCH 37/59] f2fs: fix wrong error number of fill_super This patch fixes incorrect error number in error path of fill_super. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2aa864eae522..227498064a8f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1930,6 +1930,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (f2fs_sb_mounted_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); + err = -EOPNOTSUPP; goto free_sb_buf; } #endif @@ -2003,8 +2004,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (F2FS_IO_SIZE(sbi) > 1) { sbi->write_io_dummy = mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); - if (!sbi->write_io_dummy) + if (!sbi->write_io_dummy) { + err = -ENOMEM; goto free_options; + } } /* get an inode for meta space */ From 56412894b3cee24805e48f380ffa9a5f32cff183 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Jun 2017 22:30:44 +0800 Subject: [PATCH 38/59] f2fs: fix to document fault injection option and sysfs file Commit 73faec4d9935 ("f2fs: add mount option to select fault injection ratio") and Commit 087968974fcd ("f2fs: add fault injection to sysfs") forget to document mount option and sysfs file. This patch fixes to document them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 12 ++++++++++++ Documentation/filesystems/f2fs.txt | 2 ++ 2 files changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3cf9320f492c..b09108811ff1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -112,3 +112,15 @@ Date: January 2016 Contact: "Shuoran Liu" Description: Shows total written kbytes issued to disk. + +What: /sys/fs/f2fs//inject_rate +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection rate. + +What: /sys/fs/f2fs//inject_type +Date: May 2016 +Contact: "Sheng Yong" +Description: + Controls the injection type. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4f6531a4701b..8b04a6359530 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -155,6 +155,8 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +fault_injection=%d Enable fault injection in all supported types with + specified injection rate. mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. From a398101aa113351ec973e9e6a3208c7160b7b1fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Jun 2017 17:39:46 +0800 Subject: [PATCH 39/59] f2fs: clean up sysfs codes Just cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 121 +++++++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 227498064a8f..6c04a5af455a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -349,6 +349,22 @@ static struct kobj_type f2fs_ktype = { .release = f2fs_sb_release, }; +int __init f2fs_register_sysfs(void) +{ + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) + return -ENOMEM; + return 0; +} + +void f2fs_unregister_sysfs(void) +{ + kset_unregister(f2fs_kset); + remove_proc_entry("fs/f2fs", NULL); +} + void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -766,17 +782,23 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) kfree(sbi->devs); } -static void f2fs_put_super(struct super_block *sb) +void f2fs_exit_sysfs(struct f2fs_sb_info *sbi) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); - int i; + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); + remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } - kobject_del(&sbi->s_kobj); +} + +static void f2fs_put_super(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; stop_gc_thread(sbi); @@ -829,8 +851,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + + f2fs_exit_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) @@ -1058,6 +1080,37 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); +int f2fs_init_sysfs(struct f2fs_sb_info *sbi) +{ + struct super_block *sb = sbi->sb; + int err; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) { + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_bits_fops, sb); + } + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto err_out; + return 0; +err_out: + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + return err; +} + static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ @@ -2114,22 +2167,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_root_inode; } - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); - - if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - } - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); + err = f2fs_init_sysfs(sbi); if (err) - goto free_proc; + goto free_root_inode; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2140,7 +2180,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (bdev_read_only(sb->s_bdev) && !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - goto free_kobj; + goto free_sysfs; } if (need_fsck) @@ -2154,7 +2194,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%d", err); - goto free_kobj; + goto free_sysfs; } } else { err = recover_fsync_data(sbi, true); @@ -2163,7 +2203,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_kobj; + goto free_sysfs; } } skip_recovery: @@ -2178,7 +2218,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto free_kobj; + goto free_sysfs; } kfree(options); @@ -2196,17 +2236,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_update_time(sbi, REQ_TIME); return 0; -free_kobj: +free_sysfs: f2fs_sync_inode_meta(sbi); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); -free_proc: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } + f2fs_exit_sysfs(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -2321,30 +2353,26 @@ static int __init init_f2fs_fs(void) err = create_extent_cache(); if (err) goto free_checkpoint_caches; - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) { - err = -ENOMEM; + err = f2fs_register_sysfs(); + if (err) goto free_extent_cache; - } err = register_shrinker(&f2fs_shrinker_info); if (err) - goto free_kset; - + goto free_sysfs; err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; err = f2fs_create_root_stats(); if (err) goto free_filesystem; - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); -free_kset: - kset_unregister(f2fs_kset); +free_sysfs: + f2fs_unregister_sysfs(); free_extent_cache: destroy_extent_cache(); free_checkpoint_caches: @@ -2361,11 +2389,10 @@ static int __init init_f2fs_fs(void) static void __exit exit_f2fs_fs(void) { - remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); - kset_unregister(f2fs_kset); + f2fs_unregister_sysfs(); destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); From 8ceffcb29e61ba882a011b1e4d73ca03691fdc2e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Jun 2017 17:39:47 +0800 Subject: [PATCH 40/59] f2fs: move sysfs code from super.c to fs/f2fs/sysfs.c Codes related to sysfs and procfs are dispersive and mixed with sb related codes, but actually these codes are independent from others, so split them from super.c, and reorgnize and manger them in sysfs.c. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/f2fs.h | 8 ++ fs/f2fs/super.c | 332 -------------------------------------------- fs/f2fs/sysfs.c | 350 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 359 insertions(+), 333 deletions(-) create mode 100644 fs/f2fs/sysfs.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index ca949ea7c02f..a0dc559b1b47 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o -f2fs-y += shrinker.o extent_cache.o +f2fs-y += shrinker.o extent_cache.o sysfs.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da70964cbd74..dd5449423fd2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2673,6 +2673,14 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi); int __init create_extent_cache(void); void destroy_extent_cache(void); +/* + * sysfs.c + */ +int __init f2fs_register_sysfs(void); +void f2fs_unregister_sysfs(void); +int f2fs_init_sysfs(struct f2fs_sb_info *sbi); +void f2fs_exit_sysfs(struct f2fs_sb_info *sbi); + /* * crypto support */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6c04a5af455a..9081570bc616 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -35,9 +35,7 @@ #define CREATE_TRACE_POINTS #include -static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; -static struct kset *f2fs_kset; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -146,225 +144,6 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; -/* Sysfs support for f2fs */ -enum { - GC_THREAD, /* struct f2fs_gc_thread */ - SM_INFO, /* struct f2fs_sm_info */ - DCC_INFO, /* struct discard_cmd_control */ - NM_INFO, /* struct f2fs_nm_info */ - F2FS_SBI, /* struct f2fs_sb_info */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - FAULT_INFO_RATE, /* struct f2fs_fault_info */ - FAULT_INFO_TYPE, /* struct f2fs_fault_info */ -#endif -}; - -struct f2fs_attr { - struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); - int struct_type; - int offset; -}; - -static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) -{ - if (struct_type == GC_THREAD) - return (unsigned char *)sbi->gc_thread; - else if (struct_type == SM_INFO) - return (unsigned char *)SM_I(sbi); - else if (struct_type == DCC_INFO) - return (unsigned char *)SM_I(sbi)->dcc_info; - else if (struct_type == NM_INFO) - return (unsigned char *)NM_I(sbi); - else if (struct_type == F2FS_SBI) - return (unsigned char *)sbi; -#ifdef CONFIG_F2FS_FAULT_INJECTION - else if (struct_type == FAULT_INFO_RATE || - struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; -#endif - return NULL; -} - -static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + - BD_PART_WRITTEN(sbi))); -} - -static ssize_t f2fs_sbi_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ - unsigned char *ptr = NULL; - unsigned int *ui; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t f2fs_sbi_store(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned char *ptr; - unsigned long t; - unsigned int *ui; - ssize_t ret; - - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) - return -EINVAL; - - ui = (unsigned int *)(ptr + a->offset); - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret < 0) - return ret; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) - return -EINVAL; -#endif - *ui = t; - return count; -} - -static ssize_t f2fs_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void f2fs_sb_release(struct kobject *kobj) -{ - struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .struct_type = _struct_type, \ - .offset = _offset \ -} - -#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ - F2FS_ATTR_OFFSET(struct_type, name, 0644, \ - f2fs_sbi_show, f2fs_sbi_store, \ - offsetof(struct struct_name, elname)) - -#define F2FS_GENERAL_RO_ATTR(name) \ -static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) - -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); -#ifdef CONFIG_F2FS_FAULT_INJECTION -F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); -F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); -#endif -F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); - -#define ATTR_LIST(name) (&f2fs_attr_##name.attr) -static struct attribute *f2fs_attrs[] = { - ATTR_LIST(gc_min_sleep_time), - ATTR_LIST(gc_max_sleep_time), - ATTR_LIST(gc_no_gc_sleep_time), - ATTR_LIST(gc_idle), - ATTR_LIST(reclaim_segments), - ATTR_LIST(max_small_discards), - ATTR_LIST(batched_trim_sections), - ATTR_LIST(ipu_policy), - ATTR_LIST(min_ipu_util), - ATTR_LIST(min_fsync_blocks), - ATTR_LIST(min_hot_blocks), - ATTR_LIST(max_victim_search), - ATTR_LIST(dir_level), - ATTR_LIST(ram_thresh), - ATTR_LIST(ra_nid_pages), - ATTR_LIST(dirty_nats_ratio), - ATTR_LIST(cp_interval), - ATTR_LIST(idle_interval), -#ifdef CONFIG_F2FS_FAULT_INJECTION - ATTR_LIST(inject_rate), - ATTR_LIST(inject_type), -#endif - ATTR_LIST(lifetime_write_kbytes), - NULL, -}; - -static const struct sysfs_ops f2fs_attr_ops = { - .show = f2fs_attr_show, - .store = f2fs_attr_store, -}; - -static struct kobj_type f2fs_ktype = { - .default_attrs = f2fs_attrs, - .sysfs_ops = &f2fs_attr_ops, - .release = f2fs_sb_release, -}; - -int __init f2fs_register_sysfs(void) -{ - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); - - f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) - return -ENOMEM; - return 0; -} - -void f2fs_unregister_sysfs(void) -{ - kset_unregister(f2fs_kset); - remove_proc_entry("fs/f2fs", NULL); -} - void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -782,19 +561,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) kfree(sbi->devs); } -void f2fs_exit_sysfs(struct f2fs_sb_info *sbi) -{ - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); - - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); - } -} - static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1013,104 +779,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static int segment_info_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i; - - seq_puts(seq, "format: segment_type|valid_blocks\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - if ((i % 10) == 0) - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); - if ((i % 10) == 9 || i == (total_segs - 1)) - seq_putc(seq, '\n'); - else - seq_putc(seq, ' '); - } - - return 0; -} - -static int segment_bits_seq_show(struct seq_file *seq, void *offset) -{ - struct super_block *sb = seq->private; - struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = - le32_to_cpu(sbi->raw_super->segment_count_main); - int i, j; - - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" - "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); - - for (i = 0; i < total_segs; i++) { - struct seg_entry *se = get_seg_entry(sbi, i); - - seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); - for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) - seq_printf(seq, " %.2x", se->cur_valid_map[j]); - seq_putc(seq, '\n'); - } - return 0; -} - -#define F2FS_PROC_FILE_DEF(_name) \ -static int _name##_open_fs(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ -} \ - \ -static const struct file_operations f2fs_seq_##_name##_fops = { \ - .open = _name##_open_fs, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - -F2FS_PROC_FILE_DEF(segment_info); -F2FS_PROC_FILE_DEF(segment_bits); - -int f2fs_init_sysfs(struct f2fs_sb_info *sbi) -{ - struct super_block *sb = sbi->sb; - int err; - - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); - - if (sbi->s_proc) { - proc_create_data("segment_info", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_info_fops, sb); - proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, - &f2fs_seq_segment_bits_fops, sb); - } - - sbi->s_kobj.kset = f2fs_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, - "%s", sb->s_id); - if (err) - goto err_out; - return 0; -err_out: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - return err; -} - static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c new file mode 100644 index 000000000000..714a3e47bbe8 --- /dev/null +++ b/fs/f2fs/sysfs.c @@ -0,0 +1,350 @@ +/* + * f2fs sysfs interface + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2017 Chao Yu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +#include "f2fs.h" +#include "segment.h" +#include "gc.h" + +static struct proc_dir_entry *f2fs_proc_root; +static struct kset *f2fs_kset; + +/* Sysfs support for f2fs */ +enum { + GC_THREAD, /* struct f2fs_gc_thread */ + SM_INFO, /* struct f2fs_sm_info */ + DCC_INFO, /* struct discard_cmd_control */ + NM_INFO, /* struct f2fs_nm_info */ + F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + FAULT_INFO_RATE, /* struct f2fs_fault_info */ + FAULT_INFO_TYPE, /* struct f2fs_fault_info */ +#endif +}; + +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int struct_type; + int offset; +}; + +static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) +{ + if (struct_type == GC_THREAD) + return (unsigned char *)sbi->gc_thread; + else if (struct_type == SM_INFO) + return (unsigned char *)SM_I(sbi); + else if (struct_type == DCC_INFO) + return (unsigned char *)SM_I(sbi)->dcc_info; + else if (struct_type == NM_INFO) + return (unsigned char *)NM_I(sbi); + else if (struct_type == F2FS_SBI) + return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_FAULT_INJECTION + else if (struct_type == FAULT_INFO_RATE || + struct_type == FAULT_INFO_TYPE) + return (unsigned char *)&sbi->fault_info; +#endif + return NULL; +} + +static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(sbi->kbytes_written + + BD_PART_WRITTEN(sbi))); +} + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + unsigned char *ptr = NULL; + unsigned int *ui; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned char *ptr; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + ptr = __struct_ptr(sbi, a->struct_type); + if (!ptr) + return -EINVAL; + + ui = (unsigned int *)(ptr + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) + return -EINVAL; +#endif + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .struct_type = _struct_type, \ + .offset = _offset \ +} + +#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ + F2FS_ATTR_OFFSET(struct_type, name, 0644, \ + f2fs_sbi_show, f2fs_sbi_store, \ + offsetof(struct struct_name, elname)) + +#define F2FS_GENERAL_RO_ATTR(name) \ +static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) + +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +#ifdef CONFIG_F2FS_FAULT_INJECTION +F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); +F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); +#endif +F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + ATTR_LIST(reclaim_segments), + ATTR_LIST(max_small_discards), + ATTR_LIST(batched_trim_sections), + ATTR_LIST(ipu_policy), + ATTR_LIST(min_ipu_util), + ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_hot_blocks), + ATTR_LIST(max_victim_search), + ATTR_LIST(dir_level), + ATTR_LIST(ram_thresh), + ATTR_LIST(ra_nid_pages), + ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(cp_interval), + ATTR_LIST(idle_interval), +#ifdef CONFIG_F2FS_FAULT_INJECTION + ATTR_LIST(inject_rate), + ATTR_LIST(inject_type), +#endif + ATTR_LIST(lifetime_write_kbytes), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + seq_puts(seq, "format: segment_type|valid_blocks\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + if ((i % 10) == 0) + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u", se->type, + get_valid_blocks(sbi, i, false)); + if ((i % 10) == 9 || i == (total_segs - 1)) + seq_putc(seq, '\n'); + else + seq_putc(seq, ' '); + } + + return 0; +} + +static int segment_bits_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + int i, j; + + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + + seq_printf(seq, "%-10d", i); + seq_printf(seq, "%d|%-3u|", se->type, + get_valid_blocks(sbi, i, false)); + for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) + seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_putc(seq, '\n'); + } + return 0; +} + +#define F2FS_PROC_FILE_DEF(_name) \ +static int _name##_open_fs(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, _name##_seq_show, PDE_DATA(inode)); \ +} \ + \ +static const struct file_operations f2fs_seq_##_name##_fops = { \ + .open = _name##_open_fs, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +}; + +F2FS_PROC_FILE_DEF(segment_info); +F2FS_PROC_FILE_DEF(segment_bits); + +int __init f2fs_register_sysfs(void) +{ + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) + return -ENOMEM; + return 0; +} + +void f2fs_unregister_sysfs(void) +{ + kset_unregister(f2fs_kset); + remove_proc_entry("fs/f2fs", NULL); +} + +int f2fs_init_sysfs(struct f2fs_sb_info *sbi) +{ + struct super_block *sb = sbi->sb; + int err; + + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) { + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_bits_fops, sb); + } + + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto err_out; + return 0; +err_out: + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + return err; +} + +void f2fs_exit_sysfs(struct f2fs_sb_info *sbi) +{ + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry("segment_bits", sbi->s_proc); + remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); + } +} From 67773a1fbdcb5be4a0490b1dd2a5975784ef40df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 13 Jun 2017 16:47:54 -0700 Subject: [PATCH 41/59] f2fs: require key for truncate(2) of encrypted file Currently, filesystems allow truncate(2) on an encrypted file without the encryption key. However, it's impossible to correctly handle the case where the size being truncated to is not a multiple of the filesystem block size, because that would require decrypting the final block, zeroing the part beyond i_size, then encrypting the block. As other modifications to encrypted file contents are prohibited without the key, just prohibit truncate(2) as well, making it fail with ENOKEY. Signed-off-by: Eric Biggers Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ac8b943817e6..61ee029d7e48 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -710,9 +710,13 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode) && - fscrypt_get_encryption_info(inode)) - return -EACCES; + if (f2fs_encrypted_inode(inode)) { + err = fscrypt_get_encryption_info(inode); + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } if (attr->ia_size <= i_size_read(inode)) { down_write(&F2FS_I(inode)->i_mmap_sem); From 663f387b713089463e37761bfa2561972c7f45ff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Jun 2017 23:00:55 +0800 Subject: [PATCH 42/59] f2fs: set CP_TRIMMED_FLAG correctly Don't set CP_TRIMMED_FLAG for non-zoned block device or discard unsupported device, it can avoid to trigger unneeded checkpoint for that kind of device. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9081570bc616..8e39b850bfc0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -587,7 +587,7 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ f2fs_wait_discard_bios(sbi); - if (!sbi->discard_blks) { + if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; From 0eb0adadf2e49d82bc4ecd65ec3bb69251f7564c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 14 Jun 2017 23:00:56 +0800 Subject: [PATCH 43/59] f2fs: measure inode.i_blocks as generic filesystem Both in memory or on disk, generic filesystems record i_blocks with 512bytes sized sector count, also VFS sub module such as disk quota follows this rule, but f2fs records it with 4096bytes sized block count, this difference leads to that once we use dquota's function which inc/dec iblocks, it will make i_blocks of f2fs being inconsistent between in memory and on disk. In order to resolve this issue, this patch changes to make in-memory i_blocks of f2fs recording sector count instead of block count, meanwhile leaving on-disk i_blocks recording block count. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 23 +++++++++++++---------- fs/f2fs/file.c | 1 - fs/f2fs/inode.c | 5 +++-- fs/f2fs/node.c | 2 +- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dd5449423fd2..91db1d07f9f8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1352,10 +1352,10 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) */ static inline int F2FS_HAS_BLOCKS(struct inode *inode) { - if (F2FS_I(inode)->i_xattr_nid) - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; - else - return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; + block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0; + + return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > + (F2FS_DEFAULT_ALLOCATED_BLOCKS + xattr_block); } static inline bool f2fs_has_xattr_block(unsigned int ofs) @@ -1363,7 +1363,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); +static inline void f2fs_i_blocks_write(struct inode *, block_t, bool); static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { @@ -1401,11 +1401,13 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, - blkcnt_t count) + block_t count) { + blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; + spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(sbi, inode->i_blocks < count); + f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, count, false); @@ -1856,13 +1858,14 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) } static inline void f2fs_i_blocks_write(struct inode *inode, - blkcnt_t diff, bool add) + block_t diff, bool add) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + blkcnt_t sectors = diff << F2FS_LOG_SECTORS_PER_BLOCK; - inode->i_blocks = add ? inode->i_blocks + diff : - inode->i_blocks - diff; + inode->i_blocks = add ? inode->i_blocks + sectors : + inode->i_blocks - sectors; f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 61ee029d7e48..7ea63d84a699 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -665,7 +665,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, STATX_ATTR_NODUMP); generic_fillattr(inode, stat); - stat->blocks <<= 3; return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 868d71436ebc..1ff5bd418d87 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -16,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include @@ -129,7 +130,7 @@ static int do_read_inode(struct inode *inode) i_gid_write(inode, le32_to_cpu(ri->i_gid)); set_nlink(inode, le32_to_cpu(ri->i_links)); inode->i_size = le64_to_cpu(ri->i_size); - inode->i_blocks = le64_to_cpu(ri->i_blocks); + inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks)); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); @@ -267,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); - ri->i_blocks = cpu_to_le64(inode->i_blocks); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks)); if (et) { read_lock(&et->lock); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f522378224aa..f6f46be139f4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1011,7 +1011,7 @@ int remove_inode_page(struct inode *inode) /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(F2FS_I_SB(inode), - inode->i_blocks != 0 && inode->i_blocks != 1); + inode->i_blocks != 0 && inode->i_blocks != 8); /* will put inode & node pages */ truncate_node(&dn); From a9bcf9bcd01499001834273ac1114ec76668f048 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jun 2017 08:05:32 -0700 Subject: [PATCH 44/59] f2fs: don't need to check encrypted inode for partial truncation The cache_only is always false, if inode is encrypted. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7ea63d84a699..6a201c61eef5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -534,8 +534,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); - if (!cache_only || !f2fs_encrypted_inode(inode) || - !S_ISREG(inode->i_mode)) + + /* An encrypted inode should have a key and truncate the last page. */ + f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode)); + if (!cache_only) set_page_dirty(page); f2fs_put_page(page, 1); return 0; From 34dc77ad74368707f0f51f42536e38e6ef30ff22 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 15 Jun 2017 16:44:42 -0700 Subject: [PATCH 45/59] f2fs: add ioctl to do gc with target block address This patch adds f2fs_ioc_gc_range() to move blocks located in the given range. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 91db1d07f9f8..c27a6264d9bf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -303,6 +303,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, struct f2fs_move_range) #define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ struct f2fs_flush_device) +#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ + struct f2fs_gc_range) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -327,6 +329,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif +struct f2fs_gc_range { + u32 sync; + u64 start; + u64 len; +}; + struct f2fs_defragment { u64 start; u64 len; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6a201c61eef5..3f56b27e761b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1911,6 +1911,50 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_gc_range range; + u64 end; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + end = range.start + range.len; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) + return -EINVAL; +do_more: + if (!range.sync) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + } else { + mutex_lock(&sbi->gc_mutex); + } + + ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + range.start += sbi->blocks_per_seg; + if (range.start <= end) + goto do_more; +out: + mnt_drop_write_file(filp); + return ret; +} + static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2355,6 +2399,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_encryption_pwsalt(filp, arg); case F2FS_IOC_GARBAGE_COLLECT: return f2fs_ioc_gc(filp, arg); + case F2FS_IOC_GARBAGE_COLLECT_RANGE: + return f2fs_ioc_gc_range(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: @@ -2423,6 +2469,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_POLICY: case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: case F2FS_IOC_MOVE_RANGE: From 0cc091d0c8c34092c471fb5ae7335d075d08c324 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Jun 2017 20:55:55 -0700 Subject: [PATCH 46/59] f2fs: report # of free inodes more precisely If the partition is small, we don't need to report total # of inodes including hidden free nodes. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8e39b850bfc0..3da6fb276f8b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -680,6 +680,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) struct f2fs_sb_info *sbi = F2FS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); block_t total_count, user_block_count, start_count, ovp_count; + u64 avail_node_count; total_count = le64_to_cpu(sbi->raw_super->block_count); user_block_count = sbi->user_block_count; @@ -692,9 +693,16 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; - buf->f_ffree = min(buf->f_files - valid_node_count(sbi), - buf->f_bavail); + avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + + if (avail_node_count > user_block_count) { + buf->f_files = user_block_count; + buf->f_ffree = buf->f_bavail; + } else { + buf->f_files = avail_node_count; + buf->f_ffree = min(avail_node_count - valid_node_count(sbi), + buf->f_bavail); + } buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; From d871cd046f1a5ae816c836cf114d57288bcb00b2 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Sat, 24 Jun 2017 15:57:19 +0800 Subject: [PATCH 47/59] f2fs: avoid redundant f2fs_flush after remount create_flush_cmd_control will create redundant issue_flush_thread after each remount with flush_merge option. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 86a0c1095939..7637033ef87b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; + if (fcc->f2fs_issue_flush) + return err; goto init_thread; } From daeb433e42de97c79622f58681972200eec1d8da Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Jun 2017 16:24:41 +0800 Subject: [PATCH 48/59] f2fs: introduce reserved_blocks in sysfs In this patch, we add a new sysfs interface, with it, we can control number of reserved blocks in system which could not be used by user, it enable f2fs to let user to configure for adjusting over-provision ratio dynamically instead of changing it by mkfs. So we can expect it will help to reserve more free space for relieving GC in both filesystem and flash device. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/f2fs.h | 13 +++++++++---- fs/f2fs/super.c | 4 +++- fs/f2fs/sysfs.c | 16 +++++++++++++++- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index b09108811ff1..84c606fb3ca4 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -124,3 +124,9 @@ Date: May 2016 Contact: "Sheng Yong" Description: Controls the injection type. + +What: /sys/fs/f2fs//reserved_blocks +Date: June 2017 +Contact: "Chao Yu" +Description: + Controls current reserved blocks in system. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c27a6264d9bf..e15d55ccc4bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -966,6 +966,8 @@ struct f2fs_sb_info { block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ + block_t reserved_blocks; /* configurable reserved blocks */ + u32 s_next_generation; /* for NFS support */ /* # of pages, see count_type */ @@ -1376,6 +1378,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { blkcnt_t diff; + block_t avail_user_block_count; #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { @@ -1391,10 +1394,11 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { - diff = sbi->total_valid_block_count - sbi->user_block_count; + avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; - sbi->total_valid_block_count = sbi->user_block_count; + sbi->total_valid_block_count = avail_user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); percpu_counter_sub(&sbi->alloc_valid_block_count, diff); @@ -1556,7 +1560,8 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count + sbi->reserved_blocks > + sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); return false; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3da6fb276f8b..c45bac6f1795 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -691,7 +691,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi); + buf->f_bavail = user_block_count - valid_user_blocks(sbi) - + sbi->reserved_blocks; avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -1768,6 +1769,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->total_valid_block_count = le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 714a3e47bbe8..9adc202fcd6f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -30,6 +30,7 @@ enum { FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif + RESERVED_BLOCKS, }; struct f2fs_attr { @@ -51,7 +52,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)SM_I(sbi)->dcc_info; else if (struct_type == NM_INFO) return (unsigned char *)NM_I(sbi); - else if (struct_type == F2FS_SBI) + else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) return (unsigned char *)sbi; #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || @@ -111,6 +112,17 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) return -EINVAL; #endif + if (a->struct_type == RESERVED_BLOCKS) { + spin_lock(&sbi->stat_lock); + if ((unsigned long)sbi->total_valid_block_count + t > + (unsigned long)sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return -EINVAL; + } + *ui = t; + spin_unlock(&sbi->stat_lock); + return count; + } *ui = t; return count; } @@ -165,6 +177,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); @@ -208,6 +221,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(reserved_blocks), NULL, }; From cce1325247b9faafc520c5789fe60feef1fd7092 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2017 23:17:45 +0800 Subject: [PATCH 49/59] f2fs: stop gc/discard thread in prior during umount This patch resolves kernel panic for xfstests/081, caused by recent f2fs_bug_on f2fs: add f2fs_bug_on in __remove_discard_cmd For fixing, we will stop gc/discard thread in prior in ->kill_sb in order to avoid referring and releasing race among them. Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 19 +++++++++++++------ fs/f2fs/super.c | 7 ++++--- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e15d55ccc4bc..abf9eea67966 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2293,6 +2293,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); +void stop_discard_thread(struct f2fs_sb_info *sbi); void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7637033ef87b..6eaa98ea8ec6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1065,6 +1065,18 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) __wait_one_discard_bio(sbi, dc); } +void stop_discard_thread(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + + if (dcc && dcc->f2fs_issue_discard) { + struct task_struct *discard_thread = dcc->f2fs_issue_discard; + + dcc->f2fs_issue_discard = NULL; + kthread_stop(discard_thread); + } +} + /* This comes from f2fs_put_super */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { @@ -1422,12 +1434,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return; - if (dcc->f2fs_issue_discard) { - struct task_struct *discard_thread = dcc->f2fs_issue_discard; - - dcc->f2fs_issue_discard = NULL; - kthread_stop(discard_thread); - } + stop_discard_thread(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c45bac6f1795..f27c141cd8aa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -566,8 +566,6 @@ static void f2fs_put_super(struct super_block *sb) struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; - stop_gc_thread(sbi); - /* prevent remaining shrinker jobs */ mutex_lock(&sbi->umount_mutex); @@ -1976,8 +1974,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { - if (sb->s_root) + if (sb->s_root) { set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); + stop_gc_thread(F2FS_SB(sb)); + stop_discard_thread(F2FS_SB(sb)); + } kill_block_super(sb); } From 6915ea9d8dd8690570fe4a9864b898447a4f3da0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 30 Jun 2017 17:19:02 +0800 Subject: [PATCH 50/59] f2fs: introduce __check_sit_bitmap After we introduce discard thread, discard command can be issued concurrently with data allocating, this patch adds new function to heck sit bitmap to ensure that userdata was invalid in which on-going discard command covered. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6eaa98ea8ec6..4c246e351103 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -763,6 +763,30 @@ static void f2fs_submit_discard_endio(struct bio *bio) bio_put(bio); } +void __check_sit_bitmap(struct f2fs_sb_info *sbi, + block_t start, block_t end) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct seg_entry *sentry; + unsigned int segno; + block_t blk = start; + unsigned long offset, size, max_blocks = sbi->blocks_per_seg; + unsigned long *map; + + while (blk < end) { + segno = GET_SEGNO(sbi, blk); + sentry = get_seg_entry(sbi, segno); + offset = GET_BLKOFF_FROM_SEG0(sbi, blk); + + size = min((unsigned long)(end - blk), max_blocks); + map = (unsigned long *)(sentry->cur_valid_map); + offset = __find_rev_next_bit(map, size, offset); + f2fs_bug_on(sbi, offset != size); + blk += size; + } +#endif +} + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) @@ -790,6 +814,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_opf |= REQ_SYNC; submit_bio(bio); list_move_tail(&dc->list, &dcc->wait_list); + __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); } } else { __remove_discard_cmd(sbi, dc); From 0771fcc71c0c28bf31ac5c2c863b9f0de0fdf00d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2017 23:20:45 +0800 Subject: [PATCH 51/59] f2fs: skip ->writepages for {mete,node}_inode during recovery Skip ->writepages in prior to ->writepage for {meta,node}_inode during recovery, hence unneeded loop in ->writepages can be avoided. Moreover, check SBI_POR_DOING earlier while writebacking pages. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/data.c | 13 +++++++------ fs/f2fs/node.c | 3 +++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 12559a4b6c24..954917d582f8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -269,6 +269,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); long diff, written; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7d3af48d34a9..cffcfa8d2571 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1492,6 +1492,9 @@ static int __write_data_page(struct page *page, bool *submitted, trace_f2fs_writepage(page, DATA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (page->index < end_index) goto write; @@ -1505,8 +1508,6 @@ static int __write_data_page(struct page *page, bool *submitted, zero_user_segment(page, offset, PAGE_SIZE); write: - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; /* we should not write 0'th page having journal header */ @@ -1754,6 +1755,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && available_free_memory(sbi, DIRTY_DENTS)) @@ -1763,10 +1768,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; - /* during POR, we don't need to trigger writepage at all. */ - if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) - goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, DATA); /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f6f46be139f4..fd57ffd88508 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1687,6 +1687,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct blk_plug plug; long diff; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); From d58dfb75056c5f732a0b83c54d22c99b4edc947a Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 26 Jun 2017 10:41:35 +0800 Subject: [PATCH 52/59] f2fs: do not set LOST_PINO for newly created dir Since directories will be written back with checkpoint and fsync a directory will always write CP, there is no need to set LOST_PINO after creating a directory. Signed-off-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 94756f55a97e..37f9c7f55605 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -415,7 +415,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * We lost i_pino from now on. */ if (is_inode_flag_set(inode, FI_INC_LINK)) { - file_lost_pino(inode); + if (!S_ISDIR(inode->i_mode)) + file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, * we should remove this inode from orphan list. From b855bf0e1640aa4cf2d1eef056eebcd43e0d1f5e Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 26 Jun 2017 10:41:36 +0800 Subject: [PATCH 53/59] f2fs: do not set LOST_PINO for renamed dir After renaming a directory, fsck could detect unmatched pino. The scenario can be reproduced as the following: $ mkdir /bar/subbar /foo $ rename /bar/subbar /foo Then fsck will report: [ASSERT] (__chk_dots_dentries:1182) --> Bad inode number[0x3] for '..', parent parent ino is [0x4] Rename sets LOST_PINO for old_inode. However, the flag cannot be cleared, since dir is written back with CP. So, let's get rid of LOST_PINO for a renamed dir and fix the pino directly at the end of rename. Signed-off-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c31b40e5f9cf..b75dc2f4ad57 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -772,7 +772,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry || whiteout) + file_lost_pino(old_inode); + else + F2FS_I(old_inode)->i_pino = new_dir->i_ino; up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); From 6ac851ba895dce4b85b7adfa8ddb1fd25637e70a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Jul 2017 12:17:24 +0800 Subject: [PATCH 54/59] Revert "f2fs: fix to clean previous mount option when remount_fs" Don't clear old mount option before parse new option during ->remount_fs like other generic filesystems. This reverts commit 26666c8a4366debae30ae37d0688b2bec92d196a. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f27c141cd8aa..af472f7968d0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -846,7 +846,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } - sbi->mount_opt.opt = 0; default_options(sbi); /* parse mount options */ From 000519f27866afdfde020d097b76cf2c4038595e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Jul 2017 01:11:31 +0800 Subject: [PATCH 55/59] f2fs: don't count inode block in in-memory inode.i_blocks Previously, we count all inode consumed blocks including inode block, xattr block, index block, data block into i_blocks, for other generic filesystems, they won't count inode block into i_blocks, so for userspace applications or quota system, they may detect incorrect block count according to i_blocks value in inode. This patch changes to count all blocks into inode.i_blocks excluding inode block, for on-disk i_blocks, we keep counting inode block for backward compatibility. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 22 ++++++++++++---------- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 16 ++++++---------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index abf9eea67966..7bd0a45dd081 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1355,8 +1355,6 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) return 0; } -#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 - /* * Check whether the inode has blocks or not */ @@ -1364,8 +1362,7 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode) { block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0; - return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > - (F2FS_DEFAULT_ALLOCATED_BLOCKS + xattr_block); + return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block; } static inline bool f2fs_has_xattr_block(unsigned int ofs) @@ -1552,7 +1549,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) } static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool is_inode) { block_t valid_block_count; unsigned int valid_node_count; @@ -1572,8 +1569,12 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, return false; } - if (inode) - f2fs_i_blocks_write(inode, 1, true); + if (inode) { + if (is_inode) + f2fs_mark_inode_dirty_sync(inode, true); + else + f2fs_i_blocks_write(inode, 1, true); + } sbi->total_valid_node_count++; sbi->total_valid_block_count++; @@ -1584,15 +1585,16 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, } static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool is_inode) { spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, !sbi->total_valid_block_count); f2fs_bug_on(sbi, !sbi->total_valid_node_count); - f2fs_bug_on(sbi, !inode->i_blocks); + f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); - f2fs_i_blocks_write(inode, 1, false); + if (!is_inode) + f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1ff5bd418d87..e42a7a8805dc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -130,7 +130,7 @@ static int do_read_inode(struct inode *inode) i_gid_write(inode, le32_to_cpu(ri->i_gid)); set_nlink(inode, le32_to_cpu(ri->i_links)); inode->i_size = le64_to_cpu(ri->i_size); - inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks)); + inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); @@ -268,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page) ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); - ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks)); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); if (et) { read_lock(&et->lock); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fd57ffd88508..b9f14ba6441f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -678,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn) struct node_info ni; get_node_info(sbi, dn->nid, &ni); - if (dn->inode->i_blocks == 0) { - f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); - goto invalidate; - } f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, dn->inode); + dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); if (dn->nid == dn->inode->i_ino) { @@ -694,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn) dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); } -invalidate: + clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -1044,7 +1040,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { + if (unlikely(!inc_valid_node_count(sbi, dn->inode, !ofs))) { err = -ENOSPC; goto fail; } @@ -2207,14 +2203,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) get_node_info(sbi, prev_xnid, &ni); f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, inode); + dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); recover_xnid: /* 2: update xattr nid in inode */ remove_free_nid(sbi, new_xnid); f2fs_i_xnid_write(inode, new_xnid); - if (unlikely(!inc_valid_node_count(sbi, inode))) + if (unlikely(!inc_valid_node_count(sbi, inode, false))) f2fs_bug_on(sbi, 1); update_inode_page(inode); @@ -2272,7 +2268,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; - if (unlikely(!inc_valid_node_count(sbi, NULL))) + if (unlikely(!inc_valid_node_count(sbi, NULL, true))) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); From ff1048e7dffe0582a50e2eaf90e13fc76ea8493d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Jul 2017 14:46:01 -0700 Subject: [PATCH 56/59] f2fs: relax migratepage for atomic written page In order to avoid lock contention for atomic written pages, we'd better give EBUSY in f2fs_migrate_page when mode is asynchronous. We expect it will be released soon as transaction commits. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cffcfa8d2571..72fc866cad19 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2203,8 +2203,12 @@ int f2fs_migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* migrating an atomic written page is safe with the inmem_lock hold */ - if (atomic_written && !mutex_trylock(&fi->inmem_lock)) - return -EAGAIN; + if (atomic_written) { + if (mode != MIGRATE_SYNC) + return -EBUSY; + if (!mutex_trylock(&fi->inmem_lock)) + return -EAGAIN; + } /* * A reference is expected if PagePrivate set when move mapping, From d1aa245354ae4605d1183f542ed8d45811c439f6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 7 Jul 2017 14:10:15 +0800 Subject: [PATCH 57/59] f2fs: use spin_{,un}lock_irq{save,restore} generic/361 reports below warning, this is because: once, there is someone entering into critical region of sbi.cp_lock, if write_end_io. f2fs_stop_checkpoint is invoked from an triggered IRQ, we will encounter deadlock. So this patch changes to use spin_{,un}lock_irq{save,restore} to create critical region without IRQ enabled to avoid potential deadlock. irq event stamp: 83391573 loop: Write error at byte offset 438729728, length 1024. hardirqs last enabled at (83391573): [] restore_all+0xf/0x65 hardirqs last disabled at (83391572): [] reschedule_interrupt+0x30/0x3c loop: Write error at byte offset 438860288, length 1536. softirqs last enabled at (83389244): [] __do_softirq+0x1ae/0x476 softirqs last disabled at (83389237): [] do_softirq_own_stack+0x2c/0x40 loop: Write error at byte offset 438990848, length 2048. ================================ WARNING: inconsistent lock state 4.12.0-rc2+ #30 Tainted: G O -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. xfs_io/7959 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&sbi->cp_lock)->rlock){?.+...}, at: [] f2fs_stop_checkpoint+0x1c/0x50 [f2fs] {HARDIRQ-ON-W} state was registered at: __lock_acquire+0x527/0x7b0 lock_acquire+0xae/0x220 _raw_spin_lock+0x42/0x50 do_checkpoint+0x165/0x9e0 [f2fs] write_checkpoint+0x33f/0x740 [f2fs] __f2fs_sync_fs+0x92/0x1f0 [f2fs] f2fs_sync_fs+0x12/0x20 [f2fs] sync_filesystem+0x67/0x80 generic_shutdown_super+0x27/0x100 kill_block_super+0x22/0x50 kill_f2fs_super+0x3a/0x40 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x69/0x80 exit_to_usermode_loop+0x57/0x85 do_fast_syscall_32+0x18c/0x1b0 entry_SYSENTER_32+0x4c/0x7b irq event stamp: 1957420 hardirqs last enabled at (1957419): [] _raw_spin_unlock_irq+0x27/0x50 hardirqs last disabled at (1957420): [] call_function_single_interrupt+0x30/0x3c softirqs last enabled at (1953784): [] __do_softirq+0x1ae/0x476 softirqs last disabled at (1953773): [] do_softirq_own_stack+0x2c/0x40 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&sbi->cp_lock)->rlock); lock(&(&sbi->cp_lock)->rlock); *** DEADLOCK *** 2 locks held by xfs_io/7959: #0: (sb_writers#13){.+.+.+}, at: [] vfs_write+0x16a/0x190 #1: (&sb->s_type->i_mutex_key#16){+.+.+.}, at: [] f2fs_file_write_iter+0x25/0x140 [f2fs] stack backtrace: CPU: 2 PID: 7959 Comm: xfs_io Tainted: G O 4.12.0-rc2+ #30 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack+0x5f/0x92 print_usage_bug+0x1d3/0x1dd ? check_usage_backwards+0xe0/0xe0 mark_lock+0x23d/0x280 __lock_acquire+0x699/0x7b0 ? __this_cpu_preempt_check+0xf/0x20 ? trace_hardirqs_off_caller+0x91/0xe0 lock_acquire+0xae/0x220 ? f2fs_stop_checkpoint+0x1c/0x50 [f2fs] _raw_spin_lock+0x42/0x50 ? f2fs_stop_checkpoint+0x1c/0x50 [f2fs] f2fs_stop_checkpoint+0x1c/0x50 [f2fs] f2fs_write_end_io+0x147/0x150 [f2fs] bio_endio+0x7a/0x1e0 blk_update_request+0xad/0x410 blk_mq_end_request+0x16/0x60 lo_complete_rq+0x3c/0x70 __blk_mq_complete_request_remote+0x11/0x20 flush_smp_call_function_queue+0x6d/0x120 ? debug_smp_processor_id+0x12/0x20 generic_smp_call_function_single_interrupt+0x12/0x30 smp_call_function_single_interrupt+0x25/0x40 call_function_single_interrupt+0x37/0x3c EIP: _raw_spin_unlock_irq+0x2d/0x50 EFLAGS: 00000296 CPU: 2 EAX: 00000001 EBX: d2ccc51c ECX: 00000001 EDX: c1aacebd ESI: 00000000 EDI: 00000000 EBP: c96c9d1c ESP: c96c9d18 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 ? inherit_task_group.isra.98.part.99+0x6b/0xb0 __add_to_page_cache_locked+0x1d4/0x290 add_to_page_cache_lru+0x38/0xb0 pagecache_get_page+0x8e/0x200 f2fs_write_begin+0x96/0xf00 [f2fs] ? trace_hardirqs_on_caller+0xdd/0x1c0 ? current_time+0x17/0x50 ? trace_hardirqs_on+0xb/0x10 generic_perform_write+0xa9/0x170 __generic_file_write_iter+0x1a2/0x1f0 ? f2fs_preallocate_blocks+0x137/0x160 [f2fs] f2fs_file_write_iter+0x6e/0x140 [f2fs] ? __lock_acquire+0x429/0x7b0 __vfs_write+0xc1/0x140 vfs_write+0x9b/0x190 SyS_pwrite64+0x63/0xa0 do_fast_syscall_32+0xa1/0x1b0 entry_SYSENTER_32+0x4c/0x7b EIP: 0xb7786c61 EFLAGS: 00000293 CPU: 2 EAX: ffffffda EBX: 00000003 ECX: 08416000 EDX: 00001000 ESI: 18b24000 EDI: 00000000 EBP: 00000003 ESP: bf9b36b0 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Fixes: aaec2b1d1879 ("f2fs: introduce cp_lock to protect updating of ckpt_flags") Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 11 ++++++----- fs/f2fs/f2fs.h | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 954917d582f8..56bbf592e487 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1053,8 +1053,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) { unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long flags; - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); if ((cpc->reason & CP_UMOUNT) && le32_to_cpu(ckpt->cp_pack_total_block_count) > @@ -1085,14 +1086,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1134,12 +1135,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7bd0a45dd081..ced78035a416 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1254,9 +1254,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __set_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) @@ -1270,22 +1272,26 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - spin_lock(&sbi->cp_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), f); - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { + unsigned long flags; + set_sbi_flag(sbi, SBI_NEED_FSCK); if (lock) - spin_lock(&sbi->cp_lock); + spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); kfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) - spin_unlock(&sbi->cp_lock); + spin_unlock_irqrestore(&sbi->cp_lock, flags); } static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, From d29460e5cfc9bc2241886f9f60d0650ad745cf10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Jun 2017 17:52:39 -0700 Subject: [PATCH 58/59] f2fs: avoid deadlock caused by lock order of page and lock_op - punch_hole - fill_zero - f2fs_lock_op - get_new_data_page - lock_page - f2fs_write_data_pages - lock_page - do_write_data_page - f2fs_lock_op Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 72fc866cad19..7dd5fb647d43 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1404,8 +1404,9 @@ int do_write_data_page(struct f2fs_io_info *fio) } } - if (fio->need_lock == LOCK_REQ) - f2fs_lock_op(fio->sbi); + /* Deadlock due to between page->lock and f2fs_lock_op */ + if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) + return -EAGAIN; err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) @@ -1667,7 +1668,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, } done_index = page->index; - +retry_write: lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1703,6 +1704,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping, unlock_page(page); ret = 0; continue; + } else if (ret == -EAGAIN) { + ret = 0; + if (wbc->sync_mode == WB_SYNC_ALL) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, + HZ/50); + goto retry_write; + } + continue; } done_index = page->index + 1; done = 1; From 0abd675e97e60d40e61d59532f8118b0e439034e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 9 Jul 2017 00:13:07 +0800 Subject: [PATCH 59/59] f2fs: support plain user/group quota This patch adds to support plain user/group quota. Change Note by Jaegeuk Kim. - Use f2fs page cache for quota files in order to consider garbage collection. so, quota files are not tolerable for sudden power-cuts, so user needs to do quotacheck. - setattr() calls dquot_transfer which will transfer inode->i_blocks. We can't reclaim that during f2fs_evict_inode(). So, we need to count node blocks as well in order to match i_blocks with dquot's space. Note that, Chao wrote a patch to count inode->i_blocks without inode block. (f2fs: don't count inode block in in-memory inode.i_blocks) - in f2fs_remount, we need to make RW in prior to dquot_resume. - handle fault_injection case during f2fs_quota_off_umount - TODO: Project quota Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 + fs/f2fs/data.c | 10 +- fs/f2fs/f2fs.h | 92 +++++++--- fs/f2fs/file.c | 34 +++- fs/f2fs/inode.c | 5 + fs/f2fs/namei.c | 66 ++++++- fs/f2fs/node.c | 9 +- fs/f2fs/super.c | 280 +++++++++++++++++++++++++++++ 8 files changed, 456 insertions(+), 42 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 8b04a6359530..273ccb26885e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -162,6 +162,8 @@ mode=%s Control block allocation mode which supports "adaptive" writes towards main area. io_bits=%u Set the bit size of write IO requests. It should be set with "mode=lfs". +usrquota Enable plain user disk quota accounting. +grpquota Enable plain group disk quota accounting. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7dd5fb647d43..251356859476 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -491,14 +491,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + int err; if (!count) return 0; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); @@ -749,6 +750,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct node_info ni; pgoff_t fofs; blkcnt_t count = 1; + int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; @@ -757,8 +759,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (dn->data_blkaddr == NEW_ADDR) goto alloc; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) - return -ENOSPC; + if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + return err; alloc: get_node_info(sbi, dn->nid, &ni); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ced78035a416..42c39f0bfd88 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_F2FS_FS_ENCRYPTION #include #else @@ -88,6 +89,8 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_ADAPTIVE 0x00020000 #define F2FS_MOUNT_LFS 0x00040000 +#define F2FS_MOUNT_USRQUOTA 0x00080000 +#define F2FS_MOUNT_GRPQUOTA 0x00100000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -521,6 +524,12 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; + + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ @@ -1376,17 +1385,23 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline void f2fs_i_blocks_write(struct inode *, block_t, bool); -static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, +static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); +static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { - blkcnt_t diff; + blkcnt_t diff = 0, release = 0; block_t avail_user_block_count; + int ret; + + ret = dquot_reserve_block(inode, *count); + if (ret) + return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_BLOCK)) { f2fs_show_injection_info(FAULT_BLOCK); - return false; + release = *count; + goto enospc; } #endif /* @@ -1401,17 +1416,24 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; + release = diff; sbi->total_valid_block_count = avail_user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); percpu_counter_sub(&sbi->alloc_valid_block_count, diff); - return false; + goto enospc; } } spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, *count, true); - return true; + if (release) + dquot_release_reservation_block(inode, release); + f2fs_i_blocks_write(inode, *count, true, true); + return 0; + +enospc: + dquot_release_reservation_block(inode, release); + return -ENOSPC; } static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, @@ -1425,7 +1447,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); - f2fs_i_blocks_write(inode, count, false); + f2fs_i_blocks_write(inode, count, false, true); } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1554,11 +1576,18 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } -static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, +static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; unsigned int valid_node_count; + bool quota = inode && !is_inode; + + if (quota) { + int ret = dquot_reserve_block(inode, 1); + if (ret) + return ret; + } spin_lock(&sbi->stat_lock); @@ -1566,28 +1595,33 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, if (unlikely(valid_block_count + sbi->reserved_blocks > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); - return false; + goto enospc; } valid_node_count = sbi->total_valid_node_count + 1; if (unlikely(valid_node_count > sbi->total_node_count)) { spin_unlock(&sbi->stat_lock); - return false; - } - - if (inode) { - if (is_inode) - f2fs_mark_inode_dirty_sync(inode, true); - else - f2fs_i_blocks_write(inode, 1, true); + goto enospc; } sbi->total_valid_node_count++; sbi->total_valid_block_count++; spin_unlock(&sbi->stat_lock); + if (inode) { + if (is_inode) + f2fs_mark_inode_dirty_sync(inode, true); + else + f2fs_i_blocks_write(inode, 1, true, true); + } + percpu_counter_inc(&sbi->alloc_valid_block_count); - return true; + return 0; + +enospc: + if (quota) + dquot_release_reservation_block(inode, 1); + return -ENOSPC; } static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, @@ -1599,12 +1633,13 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_node_count); f2fs_bug_on(sbi, !is_inode && !inode->i_blocks); - if (!is_inode) - f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; spin_unlock(&sbi->stat_lock); + + if (!is_inode) + f2fs_i_blocks_write(inode, 1, false, true); } static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) @@ -1879,14 +1914,21 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) } static inline void f2fs_i_blocks_write(struct inode *inode, - block_t diff, bool add) + block_t diff, bool add, bool claim) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); - blkcnt_t sectors = diff << F2FS_LOG_SECTORS_PER_BLOCK; - inode->i_blocks = add ? inode->i_blocks + sectors : - inode->i_blocks - sectors; + /* add = 1, claim = 1 should be dquot_reserve_block in pair */ + if (add) { + if (claim) + dquot_claim_block(inode, diff); + else + dquot_alloc_block_nofail(inode, diff); + } else { + dquot_free_block(inode, diff); + } + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3f56b27e761b..527c9f36d971 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -442,11 +442,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - int ret = generic_file_open(inode, filp); struct dentry *dir; - if (!ret && f2fs_encrypted_inode(inode)) { - ret = fscrypt_get_encryption_info(inode); + if (f2fs_encrypted_inode(inode)) { + int ret = fscrypt_get_encryption_info(inode); if (ret) return -EACCES; if (!fscrypt_has_encryption_key(inode)) @@ -459,7 +458,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return -EPERM; } dput(dir); - return ret; + return dquot_file_open(inode, filp); } int truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -710,6 +709,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + if (is_quota_modification(inode, attr)) { + err = dquot_initialize(inode); + if (err) + return err; + } + if ((attr->ia_valid & ATTR_UID && + !uid_eq(attr->ia_uid, inode->i_uid)) || + (attr->ia_valid & ATTR_GID && + !gid_eq(attr->ia_gid, inode->i_gid))) { + err = dquot_transfer(inode, attr); + if (err) + return err; + } + if (attr->ia_valid & ATTR_SIZE) { if (f2fs_encrypted_inode(inode)) { err = fscrypt_get_encryption_info(inode); @@ -996,9 +1009,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, if (do_replace[i]) { f2fs_i_blocks_write(src_inode, - 1, false); + 1, false, false); f2fs_i_blocks_write(dst_inode, - 1, true); + 1, true, false); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, blkaddr[i], ni.version, true, false); @@ -1523,6 +1536,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode_lock(inode); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + inode_unlock(inode); + ret = -EPERM; + goto unlock_out; + } + flags = f2fs_mask_flags(inode->i_mode, flags); oldflags = fi->i_flags; @@ -1542,7 +1562,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); f2fs_mark_inode_dirty_sync(inode, false); - +unlock_out: inode_unlock(inode); out: mnt_drop_write_file(filp); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e42a7a8805dc..6cd312a17c69 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -373,6 +373,8 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; + dquot_initialize(inode); + remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -405,8 +407,11 @@ void f2fs_evict_inode(struct inode *inode) if (err) update_inode_page(inode); + dquot_free_inode(inode); sb_end_intwrite(inode->i_sb); no_delete: + dquot_drop(inode); + stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b75dc2f4ad57..760d85223c81 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); + nid_free = true; + inode_init_owner(inode, dir, mode); inode->i_ino = ino; @@ -52,10 +55,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) err = insert_inode_locked(inode); if (err) { err = -EINVAL; - nid_free = true; goto fail; } + err = dquot_initialize(inode); + if (err) + goto fail_drop; + + err = dquot_alloc_inode(inode); + if (err) + goto fail_drop; + /* If the directory encrypted, then we should encrypt the inode. */ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); @@ -85,6 +95,16 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); +fail_drop: + trace_f2fs_new_inode(inode, err); + dquot_drop(inode); + inode->i_flags |= S_NOQUOTA; + if (nid_free) + set_inode_flag(inode, FI_FREE_NID); + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(err); } static int is_multimedia_file(const unsigned char *s, const char *sub) @@ -136,6 +156,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -180,6 +204,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !fscrypt_has_permitted_context(dir, inode)) return -EPERM; + err = dquot_initialize(dir); + if (err) + return err; + f2fs_balance_fs(sbi, true); inode->i_ctime = current_time(inode); @@ -347,6 +375,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + err = dquot_initialize(dir); + if (err) + return err; + de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { if (IS_ERR(page)) @@ -413,6 +445,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (disk_link.len > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -500,6 +536,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -548,6 +588,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -583,6 +627,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + err = dquot_initialize(dir); + if (err) + return err; + inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -676,6 +724,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -856,6 +912,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, !fscrypt_has_permitted_context(old_dir, new_inode))) return -EPERM; + err = dquot_initialize(old_dir); + if (err) + goto out; + + err = dquot_initialize(new_dir); + if (err) + goto out; + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b9f14ba6441f..3ed2f947f5da 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1040,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - if (unlikely(!inc_valid_node_count(sbi, dn->inode, !ofs))) { - err = -ENOSPC; + if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) goto fail; - } + #ifdef CONFIG_F2FS_CHECK_FS get_node_info(sbi, dn->nid, &new_ni); f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR); @@ -2210,7 +2209,7 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) /* 2: update xattr nid in inode */ remove_free_nid(sbi, new_xnid); f2fs_i_xnid_write(inode, new_xnid); - if (unlikely(!inc_valid_node_count(sbi, inode, false))) + if (unlikely(inc_valid_node_count(sbi, inode, false))) f2fs_bug_on(sbi, 1); update_inode_page(inode); @@ -2268,7 +2267,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; - if (unlikely(!inc_valid_node_count(sbi, NULL, true))) + if (unlikely(inc_valid_node_count(sbi, NULL, true))) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index af472f7968d0..dd92170e0d6d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,8 @@ enum { Opt_fault_injection, Opt_lazytime, Opt_nolazytime, + Opt_usrquota, + Opt_grpquota, Opt_err, }; @@ -141,6 +144,8 @@ static match_table_t f2fs_tokens = { {Opt_fault_injection, "fault_injection=%u"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL}, }; @@ -380,6 +385,20 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nolazytime: sb->s_flags &= ~MS_LAZYTIME; break; +#ifdef CONFIG_QUOTA + case Opt_usrquota: + set_opt(sbi, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi, GRPQUOTA); + break; +#else + case Opt_usrquota: + case Opt_grpquota: + f2fs_msg(sb, KERN_INFO, + "quota operations not supported"); + break; +#endif default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -421,6 +440,10 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->dio_rwsem[WRITE]); init_rwsem(&fi->i_mmap_sem); +#ifdef CONFIG_QUOTA + memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); + fi->i_reserved_quota = 0; +#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; return &fi->vfs_inode; @@ -561,11 +584,14 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) kfree(sbi->devs); } +static void f2fs_quota_off_umount(struct super_block *sb); static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + f2fs_quota_off_umount(sb); + /* prevent remaining shrinker jobs */ mutex_lock(&sbi->umount_mutex); @@ -782,6 +808,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",fault_injection=%u", sbi->fault_info.inject_rate); #endif +#ifdef CONFIG_QUOTA + if (test_opt(sbi, USRQUOTA)) + seq_puts(seq, ",usrquota"); + if (test_opt(sbi, GRPQUOTA)) + seq_puts(seq, ",grpquota"); +#endif return 0; } @@ -822,6 +854,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; + unsigned long old_sb_flags; int err, active_logs; bool need_restart_gc = false; bool need_stop_gc = false; @@ -835,6 +868,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * need to restore them. */ org_mount_opt = sbi->mount_opt; + old_sb_flags = sb->s_flags; active_logs = sbi->active_logs; /* recover superblocks we couldn't write due to previous RO mount */ @@ -860,6 +894,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; + if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { + err = dquot_suspend(sb, -1); + if (err < 0) + goto restore_opts; + } else { + /* dquot_resume needs RW */ + sb->s_flags &= ~MS_RDONLY; + dquot_resume(sb, -1); + } + /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -924,12 +968,237 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; + sb->s_flags = old_sb_flags; #ifdef CONFIG_F2FS_FAULT_INJECTION sbi->fault_info = ffi; #endif return err; } +#ifdef CONFIG_QUOTA +/* Read data from quotafile */ +static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + block_t blkidx = F2FS_BYTES_TO_BLK(off); + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + loff_t i_size = i_size_read(inode); + struct page *page; + char *kaddr; + + if (off > i_size) + return 0; + + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); +repeat: + page = read_mapping_page(mapping, blkidx, NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return -EIO; + } + + kaddr = kmap_atomic(page); + memcpy(data, kaddr + offset, tocopy); + kunmap_atomic(kaddr); + f2fs_put_page(page, 1); + + offset = 0; + toread -= tocopy; + data += tocopy; + blkidx++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t f2fs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + struct address_space *mapping = inode->i_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + int offset = off & (sb->s_blocksize - 1); + size_t towrite = len; + struct page *page; + char *kaddr; + int err = 0; + int tocopy; + + while (towrite > 0) { + tocopy = min_t(unsigned long, sb->s_blocksize - offset, + towrite); + + err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, + &page, NULL); + if (unlikely(err)) + break; + + kaddr = kmap_atomic(page); + memcpy(kaddr + offset, data, tocopy); + kunmap_atomic(kaddr); + flush_dcache_page(page); + + a_ops->write_end(NULL, mapping, off, tocopy, tocopy, + page, NULL); + offset = 0; + towrite -= tocopy; + off += tocopy; + data += tocopy; + cond_resched(); + } + + if (len == towrite) + return err; + inode->i_version++; + inode->i_mtime = inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, false); + return len - towrite; +} + +static struct dquot **f2fs_get_dquots(struct inode *inode) +{ + return F2FS_I(inode)->i_dquot; +} + +static qsize_t *f2fs_get_reserved_space(struct inode *inode) +{ + return &F2FS_I(inode)->i_reserved_quota; +} + +static int f2fs_quota_sync(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret; + + ret = dquot_writeback_dquots(sb, type); + if (ret) + return ret; + + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + + ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + if (ret) + return ret; + + inode_lock(dqopt->files[cnt]); + truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); + inode_unlock(dqopt->files[cnt]); + } + return 0; +} + +static int f2fs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + struct inode *inode; + int err; + + err = f2fs_quota_sync(sb, -1); + if (err) + return err; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + + inode_lock(inode); + F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); + + return 0; +} + +static int f2fs_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + return dquot_quota_off(sb, type); + + f2fs_quota_sync(sb, -1); + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + f2fs_mark_inode_dirty_sync(inode, false); +out_put: + iput(inode); + return err; +} + +static void f2fs_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + f2fs_quota_off(sb, type); +} + +static const struct dquot_operations f2fs_quota_operations = { + .get_reserved_space = f2fs_get_reserved_space, + .write_dquot = dquot_commit, + .acquire_dquot = dquot_acquire, + .release_dquot = dquot_release, + .mark_dirty = dquot_mark_dquot_dirty, + .write_info = dquot_commit_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, + .get_next_id = dquot_get_next_id, +}; + +static const struct quotactl_ops f2fs_quotactl_ops = { + .quota_on = f2fs_quota_on, + .quota_off = f2fs_quota_off, + .quota_sync = f2fs_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; +#else +static inline void f2fs_quota_off_umount(struct super_block *sb) +{ +} +#endif + static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, @@ -937,6 +1206,11 @@ static struct super_operations f2fs_sops = { .write_inode = f2fs_write_inode, .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = f2fs_quota_read, + .quota_write = f2fs_quota_write, + .get_dquots = f2fs_get_dquots, +#endif .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, .sync_fs = f2fs_sync_fs, @@ -1679,6 +1953,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); +#ifdef CONFIG_QUOTA + sb->dq_op = &f2fs_quota_operations; + sb->s_qcop = &f2fs_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; +#endif + sb->s_op = &f2fs_sops; sb->s_cop = &f2fs_cryptops; sb->s_xattr = f2fs_xattr_handlers;