ceph: properly get capsnap's size in get_oldest_context()
capsnap's size is set by __ceph_finish_cap_snap(). If capsnap is under writing, its size is zero. In this case, get_oldest_context() should read i_size. Besides, ceph_writepages_start() should re-check capsnap's size after dirty pages get locked. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
b072d77466
commit
1f934b00e9
137
fs/ceph/addr.c
137
fs/ceph/addr.c
|
@ -463,14 +463,20 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ceph_writeback_ctl
|
||||||
|
{
|
||||||
|
loff_t i_size;
|
||||||
|
u64 truncate_size;
|
||||||
|
u32 truncate_seq;
|
||||||
|
bool size_stable;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get ref for the oldest snapc for an inode with dirty data... that is, the
|
* Get ref for the oldest snapc for an inode with dirty data... that is, the
|
||||||
* only snap context we are allowed to write back.
|
* only snap context we are allowed to write back.
|
||||||
*/
|
*/
|
||||||
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
|
static struct ceph_snap_context *
|
||||||
loff_t *snap_size,
|
get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl)
|
||||||
u64 *truncate_size,
|
|
||||||
u32 *truncate_seq)
|
|
||||||
{
|
{
|
||||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
struct ceph_snap_context *snapc = NULL;
|
struct ceph_snap_context *snapc = NULL;
|
||||||
|
@ -482,12 +488,17 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
|
||||||
capsnap->context, capsnap->dirty_pages);
|
capsnap->context, capsnap->dirty_pages);
|
||||||
if (capsnap->dirty_pages) {
|
if (capsnap->dirty_pages) {
|
||||||
snapc = ceph_get_snap_context(capsnap->context);
|
snapc = ceph_get_snap_context(capsnap->context);
|
||||||
if (snap_size)
|
if (ctl) {
|
||||||
*snap_size = capsnap->size;
|
if (capsnap->writing) {
|
||||||
if (truncate_size)
|
ctl->i_size = i_size_read(inode);
|
||||||
*truncate_size = capsnap->truncate_size;
|
ctl->size_stable = false;
|
||||||
if (truncate_seq)
|
} else {
|
||||||
*truncate_seq = capsnap->truncate_seq;
|
ctl->i_size = capsnap->size;
|
||||||
|
ctl->size_stable = true;
|
||||||
|
}
|
||||||
|
ctl->truncate_size = capsnap->truncate_size;
|
||||||
|
ctl->truncate_seq = capsnap->truncate_seq;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -495,15 +506,44 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
|
||||||
snapc = ceph_get_snap_context(ci->i_head_snapc);
|
snapc = ceph_get_snap_context(ci->i_head_snapc);
|
||||||
dout(" head snapc %p has %d dirty pages\n",
|
dout(" head snapc %p has %d dirty pages\n",
|
||||||
snapc, ci->i_wrbuffer_ref_head);
|
snapc, ci->i_wrbuffer_ref_head);
|
||||||
if (truncate_size)
|
if (ctl) {
|
||||||
*truncate_size = ci->i_truncate_size;
|
ctl->i_size = i_size_read(inode);
|
||||||
if (truncate_seq)
|
ctl->truncate_size = ci->i_truncate_size;
|
||||||
*truncate_seq = ci->i_truncate_seq;
|
ctl->truncate_seq = ci->i_truncate_seq;
|
||||||
|
ctl->size_stable = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&ci->i_ceph_lock);
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
return snapc;
|
return snapc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u64 get_writepages_data_length(struct inode *inode,
|
||||||
|
struct page *page, u64 start)
|
||||||
|
{
|
||||||
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||||
|
struct ceph_snap_context *snapc = page_snap_context(page);
|
||||||
|
struct ceph_cap_snap *capsnap = NULL;
|
||||||
|
u64 end = i_size_read(inode);
|
||||||
|
|
||||||
|
if (snapc != ci->i_head_snapc) {
|
||||||
|
bool found = false;
|
||||||
|
spin_lock(&ci->i_ceph_lock);
|
||||||
|
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
|
||||||
|
if (capsnap->context == snapc) {
|
||||||
|
if (!capsnap->writing)
|
||||||
|
end = capsnap->size;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&ci->i_ceph_lock);
|
||||||
|
WARN_ON(!found);
|
||||||
|
}
|
||||||
|
if (end > page_offset(page) + PAGE_SIZE)
|
||||||
|
end = page_offset(page) + PAGE_SIZE;
|
||||||
|
return end > start ? end - start : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write a single page, but leave the page locked.
|
* Write a single page, but leave the page locked.
|
||||||
*
|
*
|
||||||
|
@ -515,21 +555,17 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct ceph_inode_info *ci;
|
struct ceph_inode_info *ci;
|
||||||
struct ceph_fs_client *fsc;
|
struct ceph_fs_client *fsc;
|
||||||
struct ceph_osd_client *osdc;
|
|
||||||
struct ceph_snap_context *snapc, *oldest;
|
struct ceph_snap_context *snapc, *oldest;
|
||||||
loff_t page_off = page_offset(page);
|
loff_t page_off = page_offset(page);
|
||||||
loff_t snap_size = -1;
|
|
||||||
long writeback_stat;
|
long writeback_stat;
|
||||||
u64 truncate_size;
|
|
||||||
u32 truncate_seq;
|
|
||||||
int err, len = PAGE_SIZE;
|
int err, len = PAGE_SIZE;
|
||||||
|
struct ceph_writeback_ctl ceph_wbc;
|
||||||
|
|
||||||
dout("writepage %p idx %lu\n", page, page->index);
|
dout("writepage %p idx %lu\n", page, page->index);
|
||||||
|
|
||||||
inode = page->mapping->host;
|
inode = page->mapping->host;
|
||||||
ci = ceph_inode(inode);
|
ci = ceph_inode(inode);
|
||||||
fsc = ceph_inode_to_client(inode);
|
fsc = ceph_inode_to_client(inode);
|
||||||
osdc = &fsc->client->osdc;
|
|
||||||
|
|
||||||
/* verify this is a writeable snap context */
|
/* verify this is a writeable snap context */
|
||||||
snapc = page_snap_context(page);
|
snapc = page_snap_context(page);
|
||||||
|
@ -537,8 +573,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
dout("writepage %p page %p not dirty?\n", inode, page);
|
dout("writepage %p page %p not dirty?\n", inode, page);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
oldest = get_oldest_context(inode, &snap_size,
|
oldest = get_oldest_context(inode, &ceph_wbc);
|
||||||
&truncate_size, &truncate_seq);
|
|
||||||
if (snapc->seq > oldest->seq) {
|
if (snapc->seq > oldest->seq) {
|
||||||
dout("writepage %p page %p snapc %p not writeable - noop\n",
|
dout("writepage %p page %p snapc %p not writeable - noop\n",
|
||||||
inode, page, snapc);
|
inode, page, snapc);
|
||||||
|
@ -550,17 +585,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
}
|
}
|
||||||
ceph_put_snap_context(oldest);
|
ceph_put_snap_context(oldest);
|
||||||
|
|
||||||
if (snap_size == -1)
|
|
||||||
snap_size = i_size_read(inode);
|
|
||||||
|
|
||||||
/* is this a partial page at end of file? */
|
/* is this a partial page at end of file? */
|
||||||
if (page_off >= snap_size) {
|
if (page_off >= ceph_wbc.i_size) {
|
||||||
dout("%p page eof %llu\n", page, snap_size);
|
dout("%p page eof %llu\n", page, ceph_wbc.i_size);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (snap_size < page_off + len)
|
if (ceph_wbc.i_size < page_off + len)
|
||||||
len = snap_size - page_off;
|
len = ceph_wbc.i_size - page_off;
|
||||||
|
|
||||||
dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
|
dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
|
||||||
inode, page, page->index, page_off, len, snapc, snapc->seq);
|
inode, page, page->index, page_off, len, snapc, snapc->seq);
|
||||||
|
@ -571,10 +603,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
|
||||||
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
|
||||||
|
|
||||||
set_page_writeback(page);
|
set_page_writeback(page);
|
||||||
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
|
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
|
||||||
&ci->i_layout, snapc,
|
&ci->i_layout, snapc, page_off, len,
|
||||||
page_off, len,
|
ceph_wbc.truncate_seq,
|
||||||
truncate_seq, truncate_size,
|
ceph_wbc.truncate_size,
|
||||||
&inode->i_mtime, &page, 1);
|
&inode->i_mtime, &page, 1);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
struct writeback_control tmp_wbc;
|
struct writeback_control tmp_wbc;
|
||||||
|
@ -745,9 +777,7 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
unsigned int wsize = i_blocksize(inode);
|
unsigned int wsize = i_blocksize(inode);
|
||||||
struct ceph_osd_request *req = NULL;
|
struct ceph_osd_request *req = NULL;
|
||||||
loff_t snap_size, i_size;
|
struct ceph_writeback_ctl ceph_wbc;
|
||||||
u64 truncate_size;
|
|
||||||
u32 truncate_seq;
|
|
||||||
|
|
||||||
dout("writepages_start %p (mode=%s)\n", inode,
|
dout("writepages_start %p (mode=%s)\n", inode,
|
||||||
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
|
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
|
||||||
|
@ -786,9 +816,7 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
retry:
|
retry:
|
||||||
/* find oldest snap context with dirty data */
|
/* find oldest snap context with dirty data */
|
||||||
ceph_put_snap_context(snapc);
|
ceph_put_snap_context(snapc);
|
||||||
snap_size = -1;
|
snapc = get_oldest_context(inode, &ceph_wbc);
|
||||||
snapc = get_oldest_context(inode, &snap_size,
|
|
||||||
&truncate_size, &truncate_seq);
|
|
||||||
if (!snapc) {
|
if (!snapc) {
|
||||||
/* hmm, why does writepages get called when there
|
/* hmm, why does writepages get called when there
|
||||||
is no dirty data? */
|
is no dirty data? */
|
||||||
|
@ -798,8 +826,6 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
|
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
|
||||||
snapc, snapc->seq, snapc->num_snaps);
|
snapc, snapc->seq, snapc->num_snaps);
|
||||||
|
|
||||||
i_size = i_size_read(inode);
|
|
||||||
|
|
||||||
if (last_snapc && snapc != last_snapc) {
|
if (last_snapc && snapc != last_snapc) {
|
||||||
/* if we switched to a newer snapc, restart our scan at the
|
/* if we switched to a newer snapc, restart our scan at the
|
||||||
* start of the original file range. */
|
* start of the original file range. */
|
||||||
|
@ -865,10 +891,9 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
dout("waiting on writeback %p\n", page);
|
dout("waiting on writeback %p\n", page);
|
||||||
wait_on_page_writeback(page);
|
wait_on_page_writeback(page);
|
||||||
}
|
}
|
||||||
if (page_offset(page) >=
|
if (page_offset(page) >= ceph_wbc.i_size) {
|
||||||
(snap_size == -1 ? i_size : snap_size)) {
|
dout("%p page eof %llu\n",
|
||||||
dout("%p page eof %llu\n", page,
|
page, ceph_wbc.i_size);
|
||||||
(snap_size == -1 ? i_size : snap_size));
|
|
||||||
done = 1;
|
done = 1;
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
break;
|
break;
|
||||||
|
@ -996,10 +1021,9 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
req = ceph_osdc_new_request(&fsc->client->osdc,
|
req = ceph_osdc_new_request(&fsc->client->osdc,
|
||||||
&ci->i_layout, vino,
|
&ci->i_layout, vino,
|
||||||
offset, &len, 0, num_ops,
|
offset, &len, 0, num_ops,
|
||||||
CEPH_OSD_OP_WRITE,
|
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
|
||||||
CEPH_OSD_FLAG_WRITE,
|
snapc, ceph_wbc.truncate_seq,
|
||||||
snapc, truncate_seq,
|
ceph_wbc.truncate_size, false);
|
||||||
truncate_size, false);
|
|
||||||
if (IS_ERR(req)) {
|
if (IS_ERR(req)) {
|
||||||
req = ceph_osdc_new_request(&fsc->client->osdc,
|
req = ceph_osdc_new_request(&fsc->client->osdc,
|
||||||
&ci->i_layout, vino,
|
&ci->i_layout, vino,
|
||||||
|
@ -1008,8 +1032,8 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
CEPH_OSD_SLAB_OPS),
|
CEPH_OSD_SLAB_OPS),
|
||||||
CEPH_OSD_OP_WRITE,
|
CEPH_OSD_OP_WRITE,
|
||||||
CEPH_OSD_FLAG_WRITE,
|
CEPH_OSD_FLAG_WRITE,
|
||||||
snapc, truncate_seq,
|
snapc, ceph_wbc.truncate_seq,
|
||||||
truncate_size, true);
|
ceph_wbc.truncate_size, true);
|
||||||
BUG_ON(IS_ERR(req));
|
BUG_ON(IS_ERR(req));
|
||||||
}
|
}
|
||||||
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
|
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
|
||||||
|
@ -1046,14 +1070,15 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
len += PAGE_SIZE;
|
len += PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (snap_size != -1) {
|
if (ceph_wbc.size_stable) {
|
||||||
len = min(len, snap_size - offset);
|
len = min(len, ceph_wbc.i_size - offset);
|
||||||
} else if (i == locked_pages) {
|
} else if (i == locked_pages) {
|
||||||
/* writepages_finish() clears writeback pages
|
/* writepages_finish() clears writeback pages
|
||||||
* according to the data length, so make sure
|
* according to the data length, so make sure
|
||||||
* data length covers all locked pages */
|
* data length covers all locked pages */
|
||||||
u64 min_len = len + 1 - PAGE_SIZE;
|
u64 min_len = len + 1 - PAGE_SIZE;
|
||||||
len = min(len, (u64)i_size_read(inode) - offset);
|
len = get_writepages_data_length(inode, pages[i - 1],
|
||||||
|
offset);
|
||||||
len = max(len, min_len);
|
len = max(len, min_len);
|
||||||
}
|
}
|
||||||
dout("writepages got pages at %llu~%llu\n", offset, len);
|
dout("writepages got pages at %llu~%llu\n", offset, len);
|
||||||
|
@ -1137,8 +1162,7 @@ static int ceph_writepages_start(struct address_space *mapping,
|
||||||
static int context_is_writeable_or_written(struct inode *inode,
|
static int context_is_writeable_or_written(struct inode *inode,
|
||||||
struct ceph_snap_context *snapc)
|
struct ceph_snap_context *snapc)
|
||||||
{
|
{
|
||||||
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
|
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
|
||||||
NULL, NULL);
|
|
||||||
int ret = !oldest || snapc->seq <= oldest->seq;
|
int ret = !oldest || snapc->seq <= oldest->seq;
|
||||||
|
|
||||||
ceph_put_snap_context(oldest);
|
ceph_put_snap_context(oldest);
|
||||||
|
@ -1183,8 +1207,7 @@ static int ceph_update_writeable_page(struct file *file,
|
||||||
* this page is already dirty in another (older) snap
|
* this page is already dirty in another (older) snap
|
||||||
* context! is it writeable now?
|
* context! is it writeable now?
|
||||||
*/
|
*/
|
||||||
oldest = get_oldest_context(inode, NULL, NULL, NULL);
|
oldest = get_oldest_context(inode, NULL);
|
||||||
|
|
||||||
if (snapc->seq > oldest->seq) {
|
if (snapc->seq > oldest->seq) {
|
||||||
ceph_put_snap_context(oldest);
|
ceph_put_snap_context(oldest);
|
||||||
dout(" page %p snapc %p not current or oldest\n",
|
dout(" page %p snapc %p not current or oldest\n",
|
||||||
|
|
Loading…
Reference in New Issue