mirror of https://gitee.com/openkylin/linux.git
Merge branch 'for-linus-merged' of git://oss.sgi.com/xfs/xfs
* 'for-linus-merged' of git://oss.sgi.com/xfs/xfs: (47 commits) xfs: convert grant head manipulations to lockless algorithm xfs: introduce new locks for the log grant ticket wait queues xfs: convert log grant heads to atomic variables xfs: convert l_tail_lsn to an atomic variable. xfs: convert l_last_sync_lsn to an atomic variable xfs: make AIL tail pushing independent of the grant lock xfs: use wait queues directly for the log wait queues xfs: combine grant heads into a single 64 bit integer xfs: rework log grant space calculations xfs: fact out common grant head/log tail verification code xfs: convert log grant ticket queues to list heads xfs: use AIL bulk delete function to implement single delete xfs: use AIL bulk update function to implement single updates xfs: remove all the inodes on a buffer from the AIL in bulk xfs: consume iodone callback items on buffers as they are processed xfs: reduce the number of AIL push wakeups xfs: bulk AIL insertion during transaction commit xfs: clean up xfs_ail_delete() xfs: Pull EFI/EFD handling out from under the AIL lock xfs: fix EFI transaction cancellation. ...
This commit is contained in:
commit
7bc4a4ce68
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef __XFS_SUPPORT_SV_H__
|
||||
#define __XFS_SUPPORT_SV_H__
|
||||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
/*
|
||||
* Synchronisation variables.
|
||||
*
|
||||
* (Parameters "pri", "svf" and "rts" are not implemented)
|
||||
*/
|
||||
|
||||
typedef struct sv_s {
|
||||
wait_queue_head_t waiters;
|
||||
} sv_t;
|
||||
|
||||
static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
add_wait_queue_exclusive(&sv->waiters, &wait);
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(lock);
|
||||
|
||||
schedule();
|
||||
|
||||
remove_wait_queue(&sv->waiters, &wait);
|
||||
}
|
||||
|
||||
#define sv_init(sv,flag,name) \
|
||||
init_waitqueue_head(&(sv)->waiters)
|
||||
#define sv_destroy(sv) \
|
||||
/*NOTHING*/
|
||||
#define sv_wait(sv, pri, lock, s) \
|
||||
_sv_wait(sv, lock)
|
||||
#define sv_signal(sv) \
|
||||
wake_up(&(sv)->waiters)
|
||||
#define sv_broadcast(sv) \
|
||||
wake_up_all(&(sv)->waiters)
|
||||
|
||||
#endif /* __XFS_SUPPORT_SV_H__ */
|
|
@ -38,15 +38,6 @@
|
|||
#include <linux/pagevec.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
/*
|
||||
* Types of I/O for bmap clustering and I/O completion tracking.
|
||||
*/
|
||||
enum {
|
||||
IO_READ, /* mapping for a read */
|
||||
IO_DELAY, /* mapping covers delalloc region */
|
||||
IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
|
||||
IO_NEW /* just allocated */
|
||||
};
|
||||
|
||||
/*
|
||||
* Prime number of hash buckets since address is used as the key.
|
||||
|
@ -182,9 +173,6 @@ xfs_setfilesize(
|
|||
xfs_inode_t *ip = XFS_I(ioend->io_inode);
|
||||
xfs_fsize_t isize;
|
||||
|
||||
ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
|
||||
ASSERT(ioend->io_type != IO_READ);
|
||||
|
||||
if (unlikely(ioend->io_error))
|
||||
return 0;
|
||||
|
||||
|
@ -244,10 +232,8 @@ xfs_end_io(
|
|||
* We might have to update the on-disk file size after extending
|
||||
* writes.
|
||||
*/
|
||||
if (ioend->io_type != IO_READ) {
|
||||
error = xfs_setfilesize(ioend);
|
||||
ASSERT(!error || error == EAGAIN);
|
||||
}
|
||||
error = xfs_setfilesize(ioend);
|
||||
ASSERT(!error || error == EAGAIN);
|
||||
|
||||
/*
|
||||
* If we didn't complete processing of the ioend, requeue it to the
|
||||
|
@ -318,14 +304,63 @@ STATIC int
|
|||
xfs_map_blocks(
|
||||
struct inode *inode,
|
||||
loff_t offset,
|
||||
ssize_t count,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
int flags)
|
||||
int type,
|
||||
int nonblocking)
|
||||
{
|
||||
int nmaps = 1;
|
||||
int new = 0;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
ssize_t count = 1 << inode->i_blkbits;
|
||||
xfs_fileoff_t offset_fsb, end_fsb;
|
||||
int error = 0;
|
||||
int bmapi_flags = XFS_BMAPI_ENTIRE;
|
||||
int nimaps = 1;
|
||||
|
||||
return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -XFS_ERROR(EIO);
|
||||
|
||||
if (type == IO_UNWRITTEN)
|
||||
bmapi_flags |= XFS_BMAPI_IGSTATE;
|
||||
|
||||
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
|
||||
if (nonblocking)
|
||||
return -XFS_ERROR(EAGAIN);
|
||||
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
||||
}
|
||||
|
||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||
(ip->i_df.if_flags & XFS_IFEXTENTS));
|
||||
ASSERT(offset <= mp->m_maxioffset);
|
||||
|
||||
if (offset + count > mp->m_maxioffset)
|
||||
count = mp->m_maxioffset - offset;
|
||||
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
|
||||
bmapi_flags, NULL, 0, imap, &nimaps, NULL);
|
||||
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
||||
|
||||
if (error)
|
||||
return -XFS_ERROR(error);
|
||||
|
||||
if (type == IO_DELALLOC &&
|
||||
(!nimaps || isnullstartblock(imap->br_startblock))) {
|
||||
error = xfs_iomap_write_allocate(ip, offset, count, imap);
|
||||
if (!error)
|
||||
trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
|
||||
return -XFS_ERROR(error);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (type == IO_UNWRITTEN) {
|
||||
ASSERT(nimaps);
|
||||
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
|
||||
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
|
||||
}
|
||||
#endif
|
||||
if (nimaps)
|
||||
trace_xfs_map_blocks_found(ip, offset, count, type, imap);
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -380,26 +415,18 @@ xfs_submit_ioend_bio(
|
|||
|
||||
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
|
||||
WRITE_SYNC_PLUG : WRITE, bio);
|
||||
ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
STATIC struct bio *
|
||||
xfs_alloc_ioend_bio(
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
struct bio *bio;
|
||||
int nvecs = bio_get_nr_vecs(bh->b_bdev);
|
||||
|
||||
do {
|
||||
bio = bio_alloc(GFP_NOIO, nvecs);
|
||||
nvecs >>= 1;
|
||||
} while (!bio);
|
||||
struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
|
||||
|
||||
ASSERT(bio->bi_private == NULL);
|
||||
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio->bi_bdev = bh->b_bdev;
|
||||
bio_get(bio);
|
||||
return bio;
|
||||
}
|
||||
|
||||
|
@ -470,9 +497,8 @@ xfs_submit_ioend(
|
|||
/* Pass 1 - start writeback */
|
||||
do {
|
||||
next = ioend->io_list;
|
||||
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
|
||||
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
|
||||
xfs_start_buffer_writeback(bh);
|
||||
}
|
||||
} while ((ioend = next) != NULL);
|
||||
|
||||
/* Pass 2 - submit I/O */
|
||||
|
@ -600,116 +626,12 @@ xfs_map_at_offset(
|
|||
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
|
||||
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
|
||||
|
||||
lock_buffer(bh);
|
||||
xfs_map_buffer(inode, bh, imap, offset);
|
||||
bh->b_bdev = xfs_find_bdev_for_inode(inode);
|
||||
set_buffer_mapped(bh);
|
||||
clear_buffer_delay(bh);
|
||||
clear_buffer_unwritten(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for a page at index that is suitable for clustering.
|
||||
*/
|
||||
STATIC unsigned int
|
||||
xfs_probe_page(
|
||||
struct page *page,
|
||||
unsigned int pg_offset)
|
||||
{
|
||||
struct buffer_head *bh, *head;
|
||||
int ret = 0;
|
||||
|
||||
if (PageWriteback(page))
|
||||
return 0;
|
||||
if (!PageDirty(page))
|
||||
return 0;
|
||||
if (!page->mapping)
|
||||
return 0;
|
||||
if (!page_has_buffers(page))
|
||||
return 0;
|
||||
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (!buffer_uptodate(bh))
|
||||
break;
|
||||
if (!buffer_mapped(bh))
|
||||
break;
|
||||
ret += bh->b_size;
|
||||
if (ret >= pg_offset)
|
||||
break;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC size_t
|
||||
xfs_probe_cluster(
|
||||
struct inode *inode,
|
||||
struct page *startpage,
|
||||
struct buffer_head *bh,
|
||||
struct buffer_head *head)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
pgoff_t tindex, tlast, tloff;
|
||||
size_t total = 0;
|
||||
int done = 0, i;
|
||||
|
||||
/* First sum forwards in this page */
|
||||
do {
|
||||
if (!buffer_uptodate(bh) || !buffer_mapped(bh))
|
||||
return total;
|
||||
total += bh->b_size;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* if we reached the end of the page, sum forwards in following pages */
|
||||
tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
|
||||
tindex = startpage->index + 1;
|
||||
|
||||
/* Prune this back to avoid pathological behavior */
|
||||
tloff = min(tlast, startpage->index + 64);
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
while (!done && tindex <= tloff) {
|
||||
unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
|
||||
|
||||
if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
|
||||
break;
|
||||
|
||||
for (i = 0; i < pagevec_count(&pvec); i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
size_t pg_offset, pg_len = 0;
|
||||
|
||||
if (tindex == tlast) {
|
||||
pg_offset =
|
||||
i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
|
||||
if (!pg_offset) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
} else
|
||||
pg_offset = PAGE_CACHE_SIZE;
|
||||
|
||||
if (page->index == tindex && trylock_page(page)) {
|
||||
pg_len = xfs_probe_page(page, pg_offset);
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
if (!pg_len) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
total += pg_len;
|
||||
tindex++;
|
||||
}
|
||||
|
||||
pagevec_release(&pvec);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test if a given page is suitable for writing as part of an unwritten
|
||||
* or delayed allocate extent.
|
||||
|
@ -731,9 +653,9 @@ xfs_is_delayed_page(
|
|||
if (buffer_unwritten(bh))
|
||||
acceptable = (type == IO_UNWRITTEN);
|
||||
else if (buffer_delay(bh))
|
||||
acceptable = (type == IO_DELAY);
|
||||
acceptable = (type == IO_DELALLOC);
|
||||
else if (buffer_dirty(bh) && buffer_mapped(bh))
|
||||
acceptable = (type == IO_NEW);
|
||||
acceptable = (type == IO_OVERWRITE);
|
||||
else
|
||||
break;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
@ -758,8 +680,7 @@ xfs_convert_page(
|
|||
loff_t tindex,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
xfs_ioend_t **ioendp,
|
||||
struct writeback_control *wbc,
|
||||
int all_bh)
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct buffer_head *bh, *head;
|
||||
xfs_off_t end_offset;
|
||||
|
@ -814,37 +735,30 @@ xfs_convert_page(
|
|||
continue;
|
||||
}
|
||||
|
||||
if (buffer_unwritten(bh) || buffer_delay(bh)) {
|
||||
if (buffer_unwritten(bh) || buffer_delay(bh) ||
|
||||
buffer_mapped(bh)) {
|
||||
if (buffer_unwritten(bh))
|
||||
type = IO_UNWRITTEN;
|
||||
else if (buffer_delay(bh))
|
||||
type = IO_DELALLOC;
|
||||
else
|
||||
type = IO_DELAY;
|
||||
type = IO_OVERWRITE;
|
||||
|
||||
if (!xfs_imap_valid(inode, imap, offset)) {
|
||||
done = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
|
||||
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
|
||||
|
||||
xfs_map_at_offset(inode, bh, imap, offset);
|
||||
lock_buffer(bh);
|
||||
if (type != IO_OVERWRITE)
|
||||
xfs_map_at_offset(inode, bh, imap, offset);
|
||||
xfs_add_to_ioend(inode, bh, offset, type,
|
||||
ioendp, done);
|
||||
|
||||
page_dirty--;
|
||||
count++;
|
||||
} else {
|
||||
type = IO_NEW;
|
||||
if (buffer_mapped(bh) && all_bh) {
|
||||
lock_buffer(bh);
|
||||
xfs_add_to_ioend(inode, bh, offset,
|
||||
type, ioendp, done);
|
||||
count++;
|
||||
page_dirty--;
|
||||
} else {
|
||||
done = 1;
|
||||
}
|
||||
done = 1;
|
||||
}
|
||||
} while (offset += len, (bh = bh->b_this_page) != head);
|
||||
|
||||
|
@ -876,7 +790,6 @@ xfs_cluster_write(
|
|||
struct xfs_bmbt_irec *imap,
|
||||
xfs_ioend_t **ioendp,
|
||||
struct writeback_control *wbc,
|
||||
int all_bh,
|
||||
pgoff_t tlast)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
|
@ -891,7 +804,7 @@ xfs_cluster_write(
|
|||
|
||||
for (i = 0; i < pagevec_count(&pvec); i++) {
|
||||
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
|
||||
imap, ioendp, wbc, all_bh);
|
||||
imap, ioendp, wbc);
|
||||
if (done)
|
||||
break;
|
||||
}
|
||||
|
@ -935,7 +848,7 @@ xfs_aops_discard_page(
|
|||
struct buffer_head *bh, *head;
|
||||
loff_t offset = page_offset(page);
|
||||
|
||||
if (!xfs_is_delayed_page(page, IO_DELAY))
|
||||
if (!xfs_is_delayed_page(page, IO_DELALLOC))
|
||||
goto out_invalidate;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
|
@ -1002,10 +915,10 @@ xfs_vm_writepage(
|
|||
unsigned int type;
|
||||
__uint64_t end_offset;
|
||||
pgoff_t end_index, last_index;
|
||||
ssize_t size, len;
|
||||
int flags, err, imap_valid = 0, uptodate = 1;
|
||||
ssize_t len;
|
||||
int err, imap_valid = 0, uptodate = 1;
|
||||
int count = 0;
|
||||
int all_bh = 0;
|
||||
int nonblocking = 0;
|
||||
|
||||
trace_xfs_writepage(inode, page, 0);
|
||||
|
||||
|
@ -1056,10 +969,14 @@ xfs_vm_writepage(
|
|||
|
||||
bh = head = page_buffers(page);
|
||||
offset = page_offset(page);
|
||||
flags = BMAPI_READ;
|
||||
type = IO_NEW;
|
||||
type = IO_OVERWRITE;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
|
||||
nonblocking = 1;
|
||||
|
||||
do {
|
||||
int new_ioend = 0;
|
||||
|
||||
if (offset >= end_offset)
|
||||
break;
|
||||
if (!buffer_uptodate(bh))
|
||||
|
@ -1076,90 +993,54 @@ xfs_vm_writepage(
|
|||
continue;
|
||||
}
|
||||
|
||||
if (imap_valid)
|
||||
imap_valid = xfs_imap_valid(inode, &imap, offset);
|
||||
|
||||
if (buffer_unwritten(bh) || buffer_delay(bh)) {
|
||||
int new_ioend = 0;
|
||||
|
||||
/*
|
||||
* Make sure we don't use a read-only iomap
|
||||
*/
|
||||
if (flags == BMAPI_READ)
|
||||
imap_valid = 0;
|
||||
|
||||
if (buffer_unwritten(bh)) {
|
||||
if (buffer_unwritten(bh)) {
|
||||
if (type != IO_UNWRITTEN) {
|
||||
type = IO_UNWRITTEN;
|
||||
flags = BMAPI_WRITE | BMAPI_IGNSTATE;
|
||||
} else if (buffer_delay(bh)) {
|
||||
type = IO_DELAY;
|
||||
flags = BMAPI_ALLOCATE;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_NONE)
|
||||
flags |= BMAPI_TRYLOCK;
|
||||
imap_valid = 0;
|
||||
}
|
||||
|
||||
if (!imap_valid) {
|
||||
/*
|
||||
* If we didn't have a valid mapping then we
|
||||
* need to ensure that we put the new mapping
|
||||
* in a new ioend structure. This needs to be
|
||||
* done to ensure that the ioends correctly
|
||||
* reflect the block mappings at io completion
|
||||
* for unwritten extent conversion.
|
||||
*/
|
||||
new_ioend = 1;
|
||||
err = xfs_map_blocks(inode, offset, len,
|
||||
&imap, flags);
|
||||
if (err)
|
||||
goto error;
|
||||
imap_valid = xfs_imap_valid(inode, &imap,
|
||||
offset);
|
||||
}
|
||||
if (imap_valid) {
|
||||
xfs_map_at_offset(inode, bh, &imap, offset);
|
||||
xfs_add_to_ioend(inode, bh, offset, type,
|
||||
&ioend, new_ioend);
|
||||
count++;
|
||||
} else if (buffer_delay(bh)) {
|
||||
if (type != IO_DELALLOC) {
|
||||
type = IO_DELALLOC;
|
||||
imap_valid = 0;
|
||||
}
|
||||
} else if (buffer_uptodate(bh)) {
|
||||
/*
|
||||
* we got here because the buffer is already mapped.
|
||||
* That means it must already have extents allocated
|
||||
* underneath it. Map the extent by reading it.
|
||||
*/
|
||||
if (!imap_valid || flags != BMAPI_READ) {
|
||||
flags = BMAPI_READ;
|
||||
size = xfs_probe_cluster(inode, page, bh, head);
|
||||
err = xfs_map_blocks(inode, offset, size,
|
||||
&imap, flags);
|
||||
if (err)
|
||||
goto error;
|
||||
imap_valid = xfs_imap_valid(inode, &imap,
|
||||
offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* We set the type to IO_NEW in case we are doing a
|
||||
* small write at EOF that is extending the file but
|
||||
* without needing an allocation. We need to update the
|
||||
* file size on I/O completion in this case so it is
|
||||
* the same case as having just allocated a new extent
|
||||
* that we are writing into for the first time.
|
||||
*/
|
||||
type = IO_NEW;
|
||||
if (trylock_buffer(bh)) {
|
||||
if (imap_valid)
|
||||
all_bh = 1;
|
||||
xfs_add_to_ioend(inode, bh, offset, type,
|
||||
&ioend, !imap_valid);
|
||||
count++;
|
||||
} else {
|
||||
if (type != IO_OVERWRITE) {
|
||||
type = IO_OVERWRITE;
|
||||
imap_valid = 0;
|
||||
}
|
||||
} else if (PageUptodate(page)) {
|
||||
ASSERT(buffer_mapped(bh));
|
||||
imap_valid = 0;
|
||||
} else {
|
||||
if (PageUptodate(page)) {
|
||||
ASSERT(buffer_mapped(bh));
|
||||
imap_valid = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (imap_valid)
|
||||
imap_valid = xfs_imap_valid(inode, &imap, offset);
|
||||
if (!imap_valid) {
|
||||
/*
|
||||
* If we didn't have a valid mapping then we need to
|
||||
* put the new mapping into a separate ioend structure.
|
||||
* This ensures non-contiguous extents always have
|
||||
* separate ioends, which is particularly important
|
||||
* for unwritten extent conversion at I/O completion
|
||||
* time.
|
||||
*/
|
||||
new_ioend = 1;
|
||||
err = xfs_map_blocks(inode, offset, &imap, type,
|
||||
nonblocking);
|
||||
if (err)
|
||||
goto error;
|
||||
imap_valid = xfs_imap_valid(inode, &imap, offset);
|
||||
}
|
||||
if (imap_valid) {
|
||||
lock_buffer(bh);
|
||||
if (type != IO_OVERWRITE)
|
||||
xfs_map_at_offset(inode, bh, &imap, offset);
|
||||
xfs_add_to_ioend(inode, bh, offset, type, &ioend,
|
||||
new_ioend);
|
||||
count++;
|
||||
}
|
||||
|
||||
if (!iohead)
|
||||
|
@ -1188,7 +1069,7 @@ xfs_vm_writepage(
|
|||
end_index = last_index;
|
||||
|
||||
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
|
||||
wbc, all_bh, end_index);
|
||||
wbc, end_index);
|
||||
}
|
||||
|
||||
if (iohead)
|
||||
|
@ -1257,13 +1138,19 @@ __xfs_get_blocks(
|
|||
int create,
|
||||
int direct)
|
||||
{
|
||||
int flags = create ? BMAPI_WRITE : BMAPI_READ;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb, end_fsb;
|
||||
int error = 0;
|
||||
int lockmode = 0;
|
||||
struct xfs_bmbt_irec imap;
|
||||
int nimaps = 1;
|
||||
xfs_off_t offset;
|
||||
ssize_t size;
|
||||
int nimap = 1;
|
||||
int new = 0;
|
||||
int error;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return -XFS_ERROR(EIO);
|
||||
|
||||
offset = (xfs_off_t)iblock << inode->i_blkbits;
|
||||
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
|
||||
|
@ -1272,15 +1159,45 @@ __xfs_get_blocks(
|
|||
if (!create && direct && offset >= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
if (direct && create)
|
||||
flags |= BMAPI_DIRECT;
|
||||
if (create) {
|
||||
lockmode = XFS_ILOCK_EXCL;
|
||||
xfs_ilock(ip, lockmode);
|
||||
} else {
|
||||
lockmode = xfs_ilock_map_shared(ip);
|
||||
}
|
||||
|
||||
error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
|
||||
&new);
|
||||
ASSERT(offset <= mp->m_maxioffset);
|
||||
if (offset + size > mp->m_maxioffset)
|
||||
size = mp->m_maxioffset - offset;
|
||||
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
|
||||
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
|
||||
XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
|
||||
if (error)
|
||||
return -error;
|
||||
if (nimap == 0)
|
||||
return 0;
|
||||
goto out_unlock;
|
||||
|
||||
if (create &&
|
||||
(!nimaps ||
|
||||
(imap.br_startblock == HOLESTARTBLOCK ||
|
||||
imap.br_startblock == DELAYSTARTBLOCK))) {
|
||||
if (direct) {
|
||||
error = xfs_iomap_write_direct(ip, offset, size,
|
||||
&imap, nimaps);
|
||||
} else {
|
||||
error = xfs_iomap_write_delay(ip, offset, size, &imap);
|
||||
}
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
|
||||
} else if (nimaps) {
|
||||
trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
|
||||
} else {
|
||||
trace_xfs_get_blocks_notfound(ip, offset, size);
|
||||
goto out_unlock;
|
||||
}
|
||||
xfs_iunlock(ip, lockmode);
|
||||
|
||||
if (imap.br_startblock != HOLESTARTBLOCK &&
|
||||
imap.br_startblock != DELAYSTARTBLOCK) {
|
||||
|
@ -1347,6 +1264,10 @@ __xfs_get_blocks(
|
|||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return -error;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1434,7 +1355,7 @@ xfs_vm_direct_IO(
|
|||
ssize_t ret;
|
||||
|
||||
if (rw & WRITE) {
|
||||
iocb->private = xfs_alloc_ioend(inode, IO_NEW);
|
||||
iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
|
||||
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
|
||||
offset, nr_segs,
|
||||
|
|
|
@ -22,6 +22,22 @@ extern struct workqueue_struct *xfsdatad_workqueue;
|
|||
extern struct workqueue_struct *xfsconvertd_workqueue;
|
||||
extern mempool_t *xfs_ioend_pool;
|
||||
|
||||
/*
|
||||
* Types of I/O for bmap clustering and I/O completion tracking.
|
||||
*/
|
||||
enum {
|
||||
IO_DIRECT = 0, /* special case for direct I/O ioends */
|
||||
IO_DELALLOC, /* mapping covers delalloc region */
|
||||
IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
|
||||
IO_OVERWRITE, /* mapping covers already allocated extent */
|
||||
};
|
||||
|
||||
#define XFS_IO_TYPES \
|
||||
{ 0, "" }, \
|
||||
{ IO_DELALLOC, "delalloc" }, \
|
||||
{ IO_UNWRITTEN, "unwritten" }, \
|
||||
{ IO_OVERWRITE, "overwrite" }
|
||||
|
||||
/*
|
||||
* xfs_ioend struct manages large extent writes for XFS.
|
||||
* It can manage several multi-page bio's at once.
|
||||
|
|
|
@ -44,12 +44,7 @@
|
|||
|
||||
static kmem_zone_t *xfs_buf_zone;
|
||||
STATIC int xfsbufd(void *);
|
||||
STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
|
||||
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
|
||||
static struct shrinker xfs_buf_shake = {
|
||||
.shrink = xfsbufd_wakeup,
|
||||
.seeks = DEFAULT_SEEKS,
|
||||
};
|
||||
|
||||
static struct workqueue_struct *xfslogd_workqueue;
|
||||
struct workqueue_struct *xfsdatad_workqueue;
|
||||
|
@ -168,8 +163,79 @@ test_page_region(
|
|||
}
|
||||
|
||||
/*
|
||||
* Internal xfs_buf_t object manipulation
|
||||
* xfs_buf_lru_add - add a buffer to the LRU.
|
||||
*
|
||||
* The LRU takes a new reference to the buffer so that it will only be freed
|
||||
* once the shrinker takes the buffer off the LRU.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_buf_lru_add(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_buftarg *btp = bp->b_target;
|
||||
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
if (list_empty(&bp->b_lru)) {
|
||||
atomic_inc(&bp->b_hold);
|
||||
list_add_tail(&bp->b_lru, &btp->bt_lru);
|
||||
btp->bt_lru_nr++;
|
||||
}
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_buf_lru_del - remove a buffer from the LRU
|
||||
*
|
||||
* The unlocked check is safe here because it only occurs when there are not
|
||||
* b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
|
||||
* to optimise the shrinker removing the buffer from the LRU and calling
|
||||
* xfs_buf_free(). i.e. it removes an unneccessary round trip on the
|
||||
* bt_lru_lock.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_buf_lru_del(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_buftarg *btp = bp->b_target;
|
||||
|
||||
if (list_empty(&bp->b_lru))
|
||||
return;
|
||||
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
if (!list_empty(&bp->b_lru)) {
|
||||
list_del_init(&bp->b_lru);
|
||||
btp->bt_lru_nr--;
|
||||
}
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
|
||||
* b_lru_ref count so that the buffer is freed immediately when the buffer
|
||||
* reference count falls to zero. If the buffer is already on the LRU, we need
|
||||
* to remove the reference that LRU holds on the buffer.
|
||||
*
|
||||
* This prevents build-up of stale buffers on the LRU.
|
||||
*/
|
||||
void
|
||||
xfs_buf_stale(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
bp->b_flags |= XBF_STALE;
|
||||
atomic_set(&(bp)->b_lru_ref, 0);
|
||||
if (!list_empty(&bp->b_lru)) {
|
||||
struct xfs_buftarg *btp = bp->b_target;
|
||||
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
if (!list_empty(&bp->b_lru)) {
|
||||
list_del_init(&bp->b_lru);
|
||||
btp->bt_lru_nr--;
|
||||
atomic_dec(&bp->b_hold);
|
||||
}
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
}
|
||||
ASSERT(atomic_read(&bp->b_hold) >= 1);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
_xfs_buf_initialize(
|
||||
|
@ -186,7 +252,9 @@ _xfs_buf_initialize(
|
|||
|
||||
memset(bp, 0, sizeof(xfs_buf_t));
|
||||
atomic_set(&bp->b_hold, 1);
|
||||
atomic_set(&bp->b_lru_ref, 1);
|
||||
init_completion(&bp->b_iowait);
|
||||
INIT_LIST_HEAD(&bp->b_lru);
|
||||
INIT_LIST_HEAD(&bp->b_list);
|
||||
RB_CLEAR_NODE(&bp->b_rbnode);
|
||||
sema_init(&bp->b_sema, 0); /* held, no waiters */
|
||||
|
@ -262,6 +330,8 @@ xfs_buf_free(
|
|||
{
|
||||
trace_xfs_buf_free(bp, _RET_IP_);
|
||||
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
|
||||
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
|
||||
uint i;
|
||||
|
||||
|
@ -337,7 +407,6 @@ _xfs_buf_lookup_pages(
|
|||
__func__, gfp_mask);
|
||||
|
||||
XFS_STATS_INC(xb_page_retries);
|
||||
xfsbufd_wakeup(NULL, 0, gfp_mask);
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/50);
|
||||
goto retry;
|
||||
}
|
||||
|
@ -828,6 +897,7 @@ xfs_buf_rele(
|
|||
|
||||
if (!pag) {
|
||||
ASSERT(!bp->b_relse);
|
||||
ASSERT(list_empty(&bp->b_lru));
|
||||
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
if (atomic_dec_and_test(&bp->b_hold))
|
||||
xfs_buf_free(bp);
|
||||
|
@ -835,13 +905,19 @@ xfs_buf_rele(
|
|||
}
|
||||
|
||||
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
|
||||
ASSERT(atomic_read(&bp->b_hold) > 0);
|
||||
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
|
||||
if (bp->b_relse) {
|
||||
atomic_inc(&bp->b_hold);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
bp->b_relse(bp);
|
||||
} else if (!(bp->b_flags & XBF_STALE) &&
|
||||
atomic_read(&bp->b_lru_ref)) {
|
||||
xfs_buf_lru_add(bp);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
} else {
|
||||
xfs_buf_lru_del(bp);
|
||||
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
|
||||
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
|
@ -1438,51 +1514,84 @@ xfs_buf_iomove(
|
|||
*/
|
||||
|
||||
/*
|
||||
* Wait for any bufs with callbacks that have been submitted but
|
||||
* have not yet returned... walk the hash list for the target.
|
||||
* Wait for any bufs with callbacks that have been submitted but have not yet
|
||||
* returned. These buffers will have an elevated hold count, so wait on those
|
||||
* while freeing all the buffers only held by the LRU.
|
||||
*/
|
||||
void
|
||||
xfs_wait_buftarg(
|
||||
struct xfs_buftarg *btp)
|
||||
{
|
||||
struct xfs_perag *pag;
|
||||
uint i;
|
||||
struct xfs_buf *bp;
|
||||
|
||||
for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
|
||||
pag = xfs_perag_get(btp->bt_mount, i);
|
||||
spin_lock(&pag->pag_buf_lock);
|
||||
while (rb_first(&pag->pag_buf_tree)) {
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
restart:
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
while (!list_empty(&btp->bt_lru)) {
|
||||
bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
|
||||
if (atomic_read(&bp->b_hold) > 1) {
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
delay(100);
|
||||
spin_lock(&pag->pag_buf_lock);
|
||||
goto restart;
|
||||
}
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
/*
|
||||
* clear the LRU reference count so the bufer doesn't get
|
||||
* ignored in xfs_buf_rele().
|
||||
*/
|
||||
atomic_set(&bp->b_lru_ref, 0);
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
xfs_buf_rele(bp);
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
}
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* buftarg list for delwrite queue processing
|
||||
*/
|
||||
static LIST_HEAD(xfs_buftarg_list);
|
||||
static DEFINE_SPINLOCK(xfs_buftarg_lock);
|
||||
|
||||
STATIC void
|
||||
xfs_register_buftarg(
|
||||
xfs_buftarg_t *btp)
|
||||
int
|
||||
xfs_buftarg_shrink(
|
||||
struct shrinker *shrink,
|
||||
int nr_to_scan,
|
||||
gfp_t mask)
|
||||
{
|
||||
spin_lock(&xfs_buftarg_lock);
|
||||
list_add(&btp->bt_list, &xfs_buftarg_list);
|
||||
spin_unlock(&xfs_buftarg_lock);
|
||||
}
|
||||
struct xfs_buftarg *btp = container_of(shrink,
|
||||
struct xfs_buftarg, bt_shrinker);
|
||||
struct xfs_buf *bp;
|
||||
LIST_HEAD(dispose);
|
||||
|
||||
STATIC void
|
||||
xfs_unregister_buftarg(
|
||||
xfs_buftarg_t *btp)
|
||||
{
|
||||
spin_lock(&xfs_buftarg_lock);
|
||||
list_del(&btp->bt_list);
|
||||
spin_unlock(&xfs_buftarg_lock);
|
||||
if (!nr_to_scan)
|
||||
return btp->bt_lru_nr;
|
||||
|
||||
spin_lock(&btp->bt_lru_lock);
|
||||
while (!list_empty(&btp->bt_lru)) {
|
||||
if (nr_to_scan-- <= 0)
|
||||
break;
|
||||
|
||||
bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
|
||||
|
||||
/*
|
||||
* Decrement the b_lru_ref count unless the value is already
|
||||
* zero. If the value is already zero, we need to reclaim the
|
||||
* buffer, otherwise it gets another trip through the LRU.
|
||||
*/
|
||||
if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
|
||||
list_move_tail(&bp->b_lru, &btp->bt_lru);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove the buffer from the LRU now to avoid needing another
|
||||
* lock round trip inside xfs_buf_rele().
|
||||
*/
|
||||
list_move(&bp->b_lru, &dispose);
|
||||
btp->bt_lru_nr--;
|
||||
}
|
||||
spin_unlock(&btp->bt_lru_lock);
|
||||
|
||||
while (!list_empty(&dispose)) {
|
||||
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
|
||||
list_del_init(&bp->b_lru);
|
||||
xfs_buf_rele(bp);
|
||||
}
|
||||
|
||||
return btp->bt_lru_nr;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1490,17 +1599,14 @@ xfs_free_buftarg(
|
|||
struct xfs_mount *mp,
|
||||
struct xfs_buftarg *btp)
|
||||
{
|
||||
unregister_shrinker(&btp->bt_shrinker);
|
||||
|
||||
xfs_flush_buftarg(btp, 1);
|
||||
if (mp->m_flags & XFS_MOUNT_BARRIER)
|
||||
xfs_blkdev_issue_flush(btp);
|
||||
iput(btp->bt_mapping->host);
|
||||
|
||||
/* Unregister the buftarg first so that we don't get a
|
||||
* wakeup finding a non-existent task
|
||||
*/
|
||||
xfs_unregister_buftarg(btp);
|
||||
kthread_stop(btp->bt_task);
|
||||
|
||||
kmem_free(btp);
|
||||
}
|
||||
|
||||
|
@ -1597,20 +1703,13 @@ xfs_alloc_delwrite_queue(
|
|||
xfs_buftarg_t *btp,
|
||||
const char *fsname)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
INIT_LIST_HEAD(&btp->bt_list);
|
||||
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
|
||||
spin_lock_init(&btp->bt_delwrite_lock);
|
||||
btp->bt_flags = 0;
|
||||
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
|
||||
if (IS_ERR(btp->bt_task)) {
|
||||
error = PTR_ERR(btp->bt_task);
|
||||
goto out_error;
|
||||
}
|
||||
xfs_register_buftarg(btp);
|
||||
out_error:
|
||||
return error;
|
||||
if (IS_ERR(btp->bt_task))
|
||||
return PTR_ERR(btp->bt_task);
|
||||
return 0;
|
||||
}
|
||||
|
||||
xfs_buftarg_t *
|
||||
|
@ -1627,12 +1726,17 @@ xfs_alloc_buftarg(
|
|||
btp->bt_mount = mp;
|
||||
btp->bt_dev = bdev->bd_dev;
|
||||
btp->bt_bdev = bdev;
|
||||
INIT_LIST_HEAD(&btp->bt_lru);
|
||||
spin_lock_init(&btp->bt_lru_lock);
|
||||
if (xfs_setsize_buftarg_early(btp, bdev))
|
||||
goto error;
|
||||
if (xfs_mapping_buftarg(btp, bdev))
|
||||
goto error;
|
||||
if (xfs_alloc_delwrite_queue(btp, fsname))
|
||||
goto error;
|
||||
btp->bt_shrinker.shrink = xfs_buftarg_shrink;
|
||||
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&btp->bt_shrinker);
|
||||
return btp;
|
||||
|
||||
error:
|
||||
|
@ -1737,27 +1841,6 @@ xfs_buf_runall_queues(
|
|||
flush_workqueue(queue);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfsbufd_wakeup(
|
||||
struct shrinker *shrink,
|
||||
int priority,
|
||||
gfp_t mask)
|
||||
{
|
||||
xfs_buftarg_t *btp;
|
||||
|
||||
spin_lock(&xfs_buftarg_lock);
|
||||
list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
|
||||
if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
|
||||
continue;
|
||||
if (list_empty(&btp->bt_delwrite_queue))
|
||||
continue;
|
||||
set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
|
||||
wake_up_process(btp->bt_task);
|
||||
}
|
||||
spin_unlock(&xfs_buftarg_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move as many buffers as specified to the supplied list
|
||||
* idicating if we skipped any buffers to prevent deadlocks.
|
||||
|
@ -1952,7 +2035,6 @@ xfs_buf_init(void)
|
|||
if (!xfsconvertd_workqueue)
|
||||
goto out_destroy_xfsdatad_workqueue;
|
||||
|
||||
register_shrinker(&xfs_buf_shake);
|
||||
return 0;
|
||||
|
||||
out_destroy_xfsdatad_workqueue:
|
||||
|
@ -1968,7 +2050,6 @@ xfs_buf_init(void)
|
|||
void
|
||||
xfs_buf_terminate(void)
|
||||
{
|
||||
unregister_shrinker(&xfs_buf_shake);
|
||||
destroy_workqueue(xfsconvertd_workqueue);
|
||||
destroy_workqueue(xfsdatad_workqueue);
|
||||
destroy_workqueue(xfslogd_workqueue);
|
||||
|
|
|
@ -128,10 +128,15 @@ typedef struct xfs_buftarg {
|
|||
|
||||
/* per device delwri queue */
|
||||
struct task_struct *bt_task;
|
||||
struct list_head bt_list;
|
||||
struct list_head bt_delwrite_queue;
|
||||
spinlock_t bt_delwrite_lock;
|
||||
unsigned long bt_flags;
|
||||
|
||||
/* LRU control structures */
|
||||
struct shrinker bt_shrinker;
|
||||
struct list_head bt_lru;
|
||||
spinlock_t bt_lru_lock;
|
||||
unsigned int bt_lru_nr;
|
||||
} xfs_buftarg_t;
|
||||
|
||||
/*
|
||||
|
@ -164,9 +169,11 @@ typedef struct xfs_buf {
|
|||
xfs_off_t b_file_offset; /* offset in file */
|
||||
size_t b_buffer_length;/* size of buffer in bytes */
|
||||
atomic_t b_hold; /* reference count */
|
||||
atomic_t b_lru_ref; /* lru reclaim ref count */
|
||||
xfs_buf_flags_t b_flags; /* status flags */
|
||||
struct semaphore b_sema; /* semaphore for lockables */
|
||||
|
||||
struct list_head b_lru; /* lru list */
|
||||
wait_queue_head_t b_waiters; /* unpin waiters */
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag; /* contains rbtree root */
|
||||
|
@ -264,7 +271,8 @@ extern void xfs_buf_terminate(void);
|
|||
#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
|
||||
~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
|
||||
|
||||
#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
|
||||
void xfs_buf_stale(struct xfs_buf *bp);
|
||||
#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
|
||||
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
|
||||
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
|
||||
#define XFS_BUF_SUPER_STALE(bp) do { \
|
||||
|
@ -328,9 +336,15 @@ extern void xfs_buf_terminate(void);
|
|||
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
|
||||
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
|
||||
|
||||
#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
|
||||
static inline void
|
||||
xfs_buf_set_ref(
|
||||
struct xfs_buf *bp,
|
||||
int lru_ref)
|
||||
{
|
||||
atomic_set(&bp->b_lru_ref, lru_ref);
|
||||
}
|
||||
#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
|
||||
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
|
||||
#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
|
||||
|
||||
#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
|
||||
|
||||
|
|
|
@ -70,8 +70,16 @@ xfs_fs_encode_fh(
|
|||
else
|
||||
fileid_type = FILEID_INO32_GEN_PARENT;
|
||||
|
||||
/* filesystem may contain 64bit inode numbers */
|
||||
if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS))
|
||||
/*
|
||||
* If the the filesystem may contain 64bit inode numbers, we need
|
||||
* to use larger file handles that can represent them.
|
||||
*
|
||||
* While we only allocate inodes that do not fit into 32 bits any
|
||||
* large enough filesystem may contain them, thus the slightly
|
||||
* confusing looking conditional below.
|
||||
*/
|
||||
if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
|
||||
(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
|
||||
fileid_type |= XFS_FILEID_TYPE_64FLAG;
|
||||
|
||||
/*
|
||||
|
|
|
@ -37,7 +37,6 @@
|
|||
|
||||
#include <kmem.h>
|
||||
#include <mrlock.h>
|
||||
#include <sv.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <support/debug.h>
|
||||
|
|
|
@ -834,8 +834,11 @@ xfsaild_wakeup(
|
|||
struct xfs_ail *ailp,
|
||||
xfs_lsn_t threshold_lsn)
|
||||
{
|
||||
ailp->xa_target = threshold_lsn;
|
||||
wake_up_process(ailp->xa_task);
|
||||
/* only ever move the target forwards */
|
||||
if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
|
||||
ailp->xa_target = threshold_lsn;
|
||||
wake_up_process(ailp->xa_task);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -847,8 +850,17 @@ xfsaild(
|
|||
long tout = 0; /* milliseconds */
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
schedule_timeout_interruptible(tout ?
|
||||
msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
|
||||
/*
|
||||
* for short sleeps indicating congestion, don't allow us to
|
||||
* get woken early. Otherwise all we do is bang on the AIL lock
|
||||
* without making progress.
|
||||
*/
|
||||
if (tout && tout <= 20)
|
||||
__set_current_state(TASK_KILLABLE);
|
||||
else
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule_timeout(tout ?
|
||||
msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
|
||||
|
||||
/* swsusp */
|
||||
try_to_freeze();
|
||||
|
@ -1118,6 +1130,8 @@ xfs_fs_evict_inode(
|
|||
*/
|
||||
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
|
||||
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
|
||||
lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
|
||||
&xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
|
||||
|
||||
xfs_inactive(ip);
|
||||
}
|
||||
|
|
|
@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
|
|||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
ASSERT(rcu_read_lock_held());
|
||||
|
||||
/*
|
||||
* check for stale RCU freed inode
|
||||
*
|
||||
* If the inode has been reallocated, it doesn't matter if it's not in
|
||||
* the AG we are walking - we are walking for writeback, so if it
|
||||
* passes all the "valid inode" checks and is dirty, then we'll write
|
||||
* it back anyway. If it has been reallocated and still being
|
||||
* initialised, the XFS_INEW check below will catch it.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (!ip->i_ino)
|
||||
goto out_unlock_noent;
|
||||
|
||||
/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
|
||||
if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
|
||||
goto out_unlock_noent;
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
/* nothing to sync during shutdown */
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return EFSCORRUPTED;
|
||||
|
||||
/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
|
||||
if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
|
||||
return ENOENT;
|
||||
|
||||
/* If we can't grab the inode, it must on it's way to reclaim. */
|
||||
if (!igrab(inode))
|
||||
return ENOENT;
|
||||
|
@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
|
|||
|
||||
/* inode is valid */
|
||||
return 0;
|
||||
|
||||
out_unlock_noent:
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
return ENOENT;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -98,12 +118,12 @@ xfs_inode_ag_walk(
|
|||
int error = 0;
|
||||
int i;
|
||||
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
|
||||
(void **)batch, first_index,
|
||||
XFS_LOOKUP_BATCH);
|
||||
if (!nr_found) {
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -118,18 +138,26 @@ xfs_inode_ag_walk(
|
|||
batch[i] = NULL;
|
||||
|
||||
/*
|
||||
* Update the index for the next lookup. Catch overflows
|
||||
* into the next AG range which can occur if we have inodes
|
||||
* in the last block of the AG and we are currently
|
||||
* pointing to the last inode.
|
||||
* Update the index for the next lookup. Catch
|
||||
* overflows into the next AG range which can occur if
|
||||
* we have inodes in the last block of the AG and we
|
||||
* are currently pointing to the last inode.
|
||||
*
|
||||
* Because we may see inodes that are from the wrong AG
|
||||
* due to RCU freeing and reallocation, only update the
|
||||
* index if it lies in this AG. It was a race that lead
|
||||
* us to see this inode, so another lookup from the
|
||||
* same index will not find it again.
|
||||
*/
|
||||
if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
|
||||
continue;
|
||||
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
|
||||
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
|
||||
done = 1;
|
||||
}
|
||||
|
||||
/* unlock now we've grabbed the inodes. */
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
if (!batch[i])
|
||||
|
@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
|
|||
struct xfs_perag *pag;
|
||||
|
||||
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
__xfs_inode_set_reclaim_tag(pag, ip);
|
||||
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
spin_unlock(&pag->pag_ici_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
|
@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
|
|||
struct xfs_inode *ip,
|
||||
int flags)
|
||||
{
|
||||
ASSERT(rcu_read_lock_held());
|
||||
|
||||
/* quick check for stale RCU freed inode */
|
||||
if (!ip->i_ino)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* do some unlocked checks first to avoid unnecceary lock traffic.
|
||||
* do some unlocked checks first to avoid unnecessary lock traffic.
|
||||
* The first is a flush lock check, the second is a already in reclaim
|
||||
* check. Only do these checks if we are not going to block on locks.
|
||||
*/
|
||||
|
@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
|
|||
* The radix tree lock here protects a thread in xfs_iget from racing
|
||||
* with us starting reclaim on the inode. Once we have the
|
||||
* XFS_IRECLAIM flag set it will not touch us.
|
||||
*
|
||||
* Due to RCU lookup, we may find inodes that have been freed and only
|
||||
* have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
|
||||
* aren't candidates for reclaim at all, so we must check the
|
||||
* XFS_IRECLAIMABLE is set first before proceeding to reclaim.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
|
||||
if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
|
||||
/* ignore as it is already under reclaim */
|
||||
if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
|
||||
__xfs_iflags_test(ip, XFS_IRECLAIM)) {
|
||||
/* not a reclaim candidate. */
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
return 1;
|
||||
}
|
||||
|
@ -795,12 +833,12 @@ xfs_reclaim_inode(
|
|||
* added to the tree assert that it's been there before to catch
|
||||
* problems with the inode life time early on.
|
||||
*/
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&pag->pag_ici_lock);
|
||||
if (!radix_tree_delete(&pag->pag_ici_root,
|
||||
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
|
||||
ASSERT(0);
|
||||
__xfs_inode_clear_reclaim(pag, ip);
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
spin_unlock(&pag->pag_ici_lock);
|
||||
|
||||
/*
|
||||
* Here we do an (almost) spurious inode lock in order to coordinate
|
||||
|
@ -864,14 +902,14 @@ xfs_reclaim_inodes_ag(
|
|||
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
|
||||
int i;
|
||||
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
nr_found = radix_tree_gang_lookup_tag(
|
||||
&pag->pag_ici_root,
|
||||
(void **)batch, first_index,
|
||||
XFS_LOOKUP_BATCH,
|
||||
XFS_ICI_RECLAIM_TAG);
|
||||
if (!nr_found) {
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -891,14 +929,24 @@ xfs_reclaim_inodes_ag(
|
|||
* occur if we have inodes in the last block of
|
||||
* the AG and we are currently pointing to the
|
||||
* last inode.
|
||||
*
|
||||
* Because we may see inodes that are from the
|
||||
* wrong AG due to RCU freeing and
|
||||
* reallocation, only update the index if it
|
||||
* lies in this AG. It was a race that lead us
|
||||
* to see this inode, so another lookup from
|
||||
* the same index will not find it again.
|
||||
*/
|
||||
if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
|
||||
pag->pag_agno)
|
||||
continue;
|
||||
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
|
||||
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
|
||||
done = 1;
|
||||
}
|
||||
|
||||
/* unlock now we've grabbed the inodes. */
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
for (i = 0; i < nr_found; i++) {
|
||||
if (!batch[i])
|
||||
|
|
|
@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
|
|||
__field(int, curr_res)
|
||||
__field(int, unit_res)
|
||||
__field(unsigned int, flags)
|
||||
__field(void *, reserve_headq)
|
||||
__field(void *, write_headq)
|
||||
__field(int, reserveq)
|
||||
__field(int, writeq)
|
||||
__field(int, grant_reserve_cycle)
|
||||
__field(int, grant_reserve_bytes)
|
||||
__field(int, grant_write_cycle)
|
||||
|
@ -784,19 +784,21 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
|
|||
__entry->curr_res = tic->t_curr_res;
|
||||
__entry->unit_res = tic->t_unit_res;
|
||||
__entry->flags = tic->t_flags;
|
||||
__entry->reserve_headq = log->l_reserve_headq;
|
||||
__entry->write_headq = log->l_write_headq;
|
||||
__entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
|
||||
__entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
|
||||
__entry->grant_write_cycle = log->l_grant_write_cycle;
|
||||
__entry->grant_write_bytes = log->l_grant_write_bytes;
|
||||
__entry->reserveq = list_empty(&log->l_reserveq);
|
||||
__entry->writeq = list_empty(&log->l_writeq);
|
||||
xlog_crack_grant_head(&log->l_grant_reserve_head,
|
||||
&__entry->grant_reserve_cycle,
|
||||
&__entry->grant_reserve_bytes);
|
||||
xlog_crack_grant_head(&log->l_grant_write_head,
|
||||
&__entry->grant_write_cycle,
|
||||
&__entry->grant_write_bytes);
|
||||
__entry->curr_cycle = log->l_curr_cycle;
|
||||
__entry->curr_block = log->l_curr_block;
|
||||
__entry->tail_lsn = log->l_tail_lsn;
|
||||
__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
|
||||
),
|
||||
TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
|
||||
"t_unit_res %u t_flags %s reserve_headq 0x%p "
|
||||
"write_headq 0x%p grant_reserve_cycle %d "
|
||||
"t_unit_res %u t_flags %s reserveq %s "
|
||||
"writeq %s grant_reserve_cycle %d "
|
||||
"grant_reserve_bytes %d grant_write_cycle %d "
|
||||
"grant_write_bytes %d curr_cycle %d curr_block %d "
|
||||
"tail_cycle %d tail_block %d",
|
||||
|
@ -807,8 +809,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
|
|||
__entry->curr_res,
|
||||
__entry->unit_res,
|
||||
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
|
||||
__entry->reserve_headq,
|
||||
__entry->write_headq,
|
||||
__entry->reserveq ? "empty" : "active",
|
||||
__entry->writeq ? "empty" : "active",
|
||||
__entry->grant_reserve_cycle,
|
||||
__entry->grant_reserve_bytes,
|
||||
__entry->grant_write_cycle,
|
||||
|
@ -835,6 +837,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
|
|||
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
|
||||
|
@ -842,6 +845,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
|
|||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
|
||||
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
|
||||
|
@ -935,10 +939,10 @@ DEFINE_PAGE_EVENT(xfs_writepage);
|
|||
DEFINE_PAGE_EVENT(xfs_releasepage);
|
||||
DEFINE_PAGE_EVENT(xfs_invalidatepage);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_iomap_class,
|
||||
DECLARE_EVENT_CLASS(xfs_imap_class,
|
||||
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
|
||||
int flags, struct xfs_bmbt_irec *irec),
|
||||
TP_ARGS(ip, offset, count, flags, irec),
|
||||
int type, struct xfs_bmbt_irec *irec),
|
||||
TP_ARGS(ip, offset, count, type, irec),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_ino_t, ino)
|
||||
|
@ -946,7 +950,7 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
|
|||
__field(loff_t, new_size)
|
||||
__field(loff_t, offset)
|
||||
__field(size_t, count)
|
||||
__field(int, flags)
|
||||
__field(int, type)
|
||||
__field(xfs_fileoff_t, startoff)
|
||||
__field(xfs_fsblock_t, startblock)
|
||||
__field(xfs_filblks_t, blockcount)
|
||||
|
@ -958,13 +962,13 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
|
|||
__entry->new_size = ip->i_new_size;
|
||||
__entry->offset = offset;
|
||||
__entry->count = count;
|
||||
__entry->flags = flags;
|
||||
__entry->type = type;
|
||||
__entry->startoff = irec ? irec->br_startoff : 0;
|
||||
__entry->startblock = irec ? irec->br_startblock : 0;
|
||||
__entry->blockcount = irec ? irec->br_blockcount : 0;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
|
||||
"offset 0x%llx count %zd flags %s "
|
||||
"offset 0x%llx count %zd type %s "
|
||||
"startoff 0x%llx startblock %lld blockcount 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
|
@ -972,20 +976,21 @@ DECLARE_EVENT_CLASS(xfs_iomap_class,
|
|||
__entry->new_size,
|
||||
__entry->offset,
|
||||
__entry->count,
|
||||
__print_flags(__entry->flags, "|", BMAPI_FLAGS),
|
||||
__print_symbolic(__entry->type, XFS_IO_TYPES),
|
||||
__entry->startoff,
|
||||
(__int64_t)__entry->startblock,
|
||||
__entry->blockcount)
|
||||
)
|
||||
|
||||
#define DEFINE_IOMAP_EVENT(name) \
|
||||
DEFINE_EVENT(xfs_iomap_class, name, \
|
||||
DEFINE_EVENT(xfs_imap_class, name, \
|
||||
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
|
||||
int flags, struct xfs_bmbt_irec *irec), \
|
||||
TP_ARGS(ip, offset, count, flags, irec))
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_found);
|
||||
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
|
||||
int type, struct xfs_bmbt_irec *irec), \
|
||||
TP_ARGS(ip, offset, count, type, irec))
|
||||
DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
|
||||
DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
|
||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
|
||||
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_simple_io_class,
|
||||
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
|
||||
|
@ -1022,6 +1027,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
|
|||
TP_ARGS(ip, offset, count))
|
||||
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
|
||||
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
|
||||
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
|
||||
|
||||
|
||||
TRACE_EVENT(xfs_itruncate_start,
|
||||
|
@ -1420,6 +1426,7 @@ DEFINE_EVENT(xfs_alloc_class, name, \
|
|||
TP_PROTO(struct xfs_alloc_arg *args), \
|
||||
TP_ARGS(args))
|
||||
DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
|
||||
DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
|
||||
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
|
||||
DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
|
||||
DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
|
||||
|
|
|
@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
|
|||
ASSERT(list_empty(&dqp->q_freelist));
|
||||
|
||||
mutex_destroy(&dqp->q_qlock);
|
||||
sv_destroy(&dqp->q_pinwait);
|
||||
kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
|
||||
|
||||
atomic_dec(&xfs_Gqm->qm_totaldquots);
|
||||
|
|
|
@ -227,7 +227,7 @@ typedef struct xfs_perag {
|
|||
|
||||
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
|
||||
|
||||
rwlock_t pag_ici_lock; /* incore inode lock */
|
||||
spinlock_t pag_ici_lock; /* incore inode cache lock */
|
||||
struct radix_tree_root pag_ici_root; /* incore inode cache root */
|
||||
int pag_ici_reclaimable; /* reclaimable inodes */
|
||||
struct mutex pag_ici_reclaim_lock; /* serialisation point */
|
||||
|
|
|
@ -577,61 +577,58 @@ xfs_alloc_ag_vextent_exact(
|
|||
xfs_extlen_t rlen; /* length of returned extent */
|
||||
|
||||
ASSERT(args->alignment == 1);
|
||||
|
||||
/*
|
||||
* Allocate/initialize a cursor for the by-number freespace btree.
|
||||
*/
|
||||
bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
|
||||
args->agno, XFS_BTNUM_BNO);
|
||||
args->agno, XFS_BTNUM_BNO);
|
||||
|
||||
/*
|
||||
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
|
||||
* Look for the closest free block <= bno, it must contain bno
|
||||
* if any free block does.
|
||||
*/
|
||||
if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
|
||||
error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
|
||||
if (error)
|
||||
goto error0;
|
||||
if (!i) {
|
||||
/*
|
||||
* Didn't find it, return null.
|
||||
*/
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
|
||||
args->agbno = NULLAGBLOCK;
|
||||
return 0;
|
||||
}
|
||||
if (!i)
|
||||
goto not_found;
|
||||
|
||||
/*
|
||||
* Grab the freespace record.
|
||||
*/
|
||||
if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
|
||||
error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
|
||||
if (error)
|
||||
goto error0;
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
||||
ASSERT(fbno <= args->agbno);
|
||||
minend = args->agbno + args->minlen;
|
||||
maxend = args->agbno + args->maxlen;
|
||||
fend = fbno + flen;
|
||||
|
||||
/*
|
||||
* Give up if the freespace isn't long enough for the minimum request.
|
||||
*/
|
||||
if (fend < minend) {
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
|
||||
args->agbno = NULLAGBLOCK;
|
||||
return 0;
|
||||
}
|
||||
if (fend < minend)
|
||||
goto not_found;
|
||||
|
||||
/*
|
||||
* End of extent will be smaller of the freespace end and the
|
||||
* maximal requested end.
|
||||
*/
|
||||
end = XFS_AGBLOCK_MIN(fend, maxend);
|
||||
/*
|
||||
*
|
||||
* Fix the length according to mod and prod if given.
|
||||
*/
|
||||
end = XFS_AGBLOCK_MIN(fend, maxend);
|
||||
args->len = end - args->agbno;
|
||||
xfs_alloc_fix_len(args);
|
||||
if (!xfs_alloc_fix_minleft(args)) {
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
|
||||
return 0;
|
||||
}
|
||||
if (!xfs_alloc_fix_minleft(args))
|
||||
goto not_found;
|
||||
|
||||
rlen = args->len;
|
||||
ASSERT(args->agbno + rlen <= fend);
|
||||
end = args->agbno + rlen;
|
||||
|
||||
/*
|
||||
* We are allocating agbno for rlen [agbno .. end]
|
||||
* Allocate/initialize a cursor for the by-size btree.
|
||||
|
@ -640,16 +637,25 @@ xfs_alloc_ag_vextent_exact(
|
|||
args->agno, XFS_BTNUM_CNT);
|
||||
ASSERT(args->agbno + args->len <=
|
||||
be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
|
||||
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
|
||||
args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
|
||||
error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
|
||||
args->len, XFSA_FIXUP_BNO_OK);
|
||||
if (error) {
|
||||
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
|
||||
goto error0;
|
||||
}
|
||||
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
|
||||
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
||||
|
||||
trace_xfs_alloc_exact_done(args);
|
||||
args->wasfromfl = 0;
|
||||
trace_xfs_alloc_exact_done(args);
|
||||
return 0;
|
||||
|
||||
not_found:
|
||||
/* Didn't find it, return null. */
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
|
||||
args->agbno = NULLAGBLOCK;
|
||||
trace_xfs_alloc_exact_notfound(args);
|
||||
return 0;
|
||||
|
||||
error0:
|
||||
|
@ -658,6 +664,95 @@ xfs_alloc_ag_vextent_exact(
|
|||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the btree in a given direction via the search cursor and compare
|
||||
* the records found against the good extent we've already found.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_alloc_find_best_extent(
|
||||
struct xfs_alloc_arg *args, /* allocation argument structure */
|
||||
struct xfs_btree_cur **gcur, /* good cursor */
|
||||
struct xfs_btree_cur **scur, /* searching cursor */
|
||||
xfs_agblock_t gdiff, /* difference for search comparison */
|
||||
xfs_agblock_t *sbno, /* extent found by search */
|
||||
xfs_extlen_t *slen,
|
||||
xfs_extlen_t *slena, /* aligned length */
|
||||
int dir) /* 0 = search right, 1 = search left */
|
||||
{
|
||||
xfs_agblock_t bno;
|
||||
xfs_agblock_t new;
|
||||
xfs_agblock_t sdiff;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
/* The good extent is perfect, no need to search. */
|
||||
if (!gdiff)
|
||||
goto out_use_good;
|
||||
|
||||
/*
|
||||
* Look until we find a better one, run out of space or run off the end.
|
||||
*/
|
||||
do {
|
||||
error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
|
||||
if (error)
|
||||
goto error0;
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
||||
xfs_alloc_compute_aligned(*sbno, *slen, args->alignment,
|
||||
args->minlen, &bno, slena);
|
||||
|
||||
/*
|
||||
* The good extent is closer than this one.
|
||||
*/
|
||||
if (!dir) {
|
||||
if (bno >= args->agbno + gdiff)
|
||||
goto out_use_good;
|
||||
} else {
|
||||
if (bno <= args->agbno - gdiff)
|
||||
goto out_use_good;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same distance, compare length and pick the best.
|
||||
*/
|
||||
if (*slena >= args->minlen) {
|
||||
args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
|
||||
sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, *sbno,
|
||||
*slen, &new);
|
||||
|
||||
/*
|
||||
* Choose closer size and invalidate other cursor.
|
||||
*/
|
||||
if (sdiff < gdiff)
|
||||
goto out_use_search;
|
||||
goto out_use_good;
|
||||
}
|
||||
|
||||
if (!dir)
|
||||
error = xfs_btree_increment(*scur, 0, &i);
|
||||
else
|
||||
error = xfs_btree_decrement(*scur, 0, &i);
|
||||
if (error)
|
||||
goto error0;
|
||||
} while (i);
|
||||
|
||||
out_use_good:
|
||||
xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
|
||||
*scur = NULL;
|
||||
return 0;
|
||||
|
||||
out_use_search:
|
||||
xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
|
||||
*gcur = NULL;
|
||||
return 0;
|
||||
|
||||
error0:
|
||||
/* caller invalidates cursors */
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a variable extent near bno in the allocation group agno.
|
||||
* Extent's length (returned in len) will be between minlen and maxlen,
|
||||
|
@ -925,203 +1020,45 @@ xfs_alloc_ag_vextent_near(
|
|||
}
|
||||
}
|
||||
} while (bno_cur_lt || bno_cur_gt);
|
||||
|
||||
/*
|
||||
* Got both cursors still active, need to find better entry.
|
||||
*/
|
||||
if (bno_cur_lt && bno_cur_gt) {
|
||||
/*
|
||||
* Left side is long enough, look for a right side entry.
|
||||
*/
|
||||
if (ltlena >= args->minlen) {
|
||||
/*
|
||||
* Fix up the length.
|
||||
* Left side is good, look for a right side entry.
|
||||
*/
|
||||
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
rlen = args->len;
|
||||
ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
|
||||
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, ltbno, ltlen, <new);
|
||||
|
||||
error = xfs_alloc_find_best_extent(args,
|
||||
&bno_cur_lt, &bno_cur_gt,
|
||||
ltdiff, >bno, >len, >lena,
|
||||
0 /* search right */);
|
||||
} else {
|
||||
ASSERT(gtlena >= args->minlen);
|
||||
|
||||
/*
|
||||
* Not perfect.
|
||||
*/
|
||||
if (ltdiff) {
|
||||
/*
|
||||
* Look until we find a better one, run out of
|
||||
* space, or run off the end.
|
||||
*/
|
||||
while (bno_cur_lt && bno_cur_gt) {
|
||||
if ((error = xfs_alloc_get_rec(
|
||||
bno_cur_gt, >bno,
|
||||
>len, &i)))
|
||||
goto error0;
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
||||
xfs_alloc_compute_aligned(gtbno, gtlen,
|
||||
args->alignment, args->minlen,
|
||||
>bnoa, >lena);
|
||||
/*
|
||||
* The left one is clearly better.
|
||||
*/
|
||||
if (gtbnoa >= args->agbno + ltdiff) {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_gt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_gt = NULL;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If we reach a big enough entry,
|
||||
* compare the two and pick the best.
|
||||
*/
|
||||
if (gtlena >= args->minlen) {
|
||||
args->len =
|
||||
XFS_EXTLEN_MIN(gtlena,
|
||||
args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
rlen = args->len;
|
||||
gtdiff = xfs_alloc_compute_diff(
|
||||
args->agbno, rlen,
|
||||
args->alignment,
|
||||
gtbno, gtlen, >new);
|
||||
/*
|
||||
* Right side is better.
|
||||
*/
|
||||
if (gtdiff < ltdiff) {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_lt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_lt = NULL;
|
||||
}
|
||||
/*
|
||||
* Left side is better.
|
||||
*/
|
||||
else {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_gt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_gt = NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Fell off the right end.
|
||||
*/
|
||||
if ((error = xfs_btree_increment(
|
||||
bno_cur_gt, 0, &i)))
|
||||
goto error0;
|
||||
if (!i) {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_gt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_gt = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* The left side is perfect, trash the right side.
|
||||
*/
|
||||
else {
|
||||
xfs_btree_del_cursor(bno_cur_gt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_gt = NULL;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* It's the right side that was found first, look left.
|
||||
*/
|
||||
else {
|
||||
/*
|
||||
* Fix up the length.
|
||||
* Right side is good, look for a left side entry.
|
||||
*/
|
||||
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
rlen = args->len;
|
||||
gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
|
||||
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
||||
args->alignment, gtbno, gtlen, >new);
|
||||
/*
|
||||
* Right side entry isn't perfect.
|
||||
*/
|
||||
if (gtdiff) {
|
||||
/*
|
||||
* Look until we find a better one, run out of
|
||||
* space, or run off the end.
|
||||
*/
|
||||
while (bno_cur_lt && bno_cur_gt) {
|
||||
if ((error = xfs_alloc_get_rec(
|
||||
bno_cur_lt, <bno,
|
||||
<len, &i)))
|
||||
goto error0;
|
||||
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
||||
xfs_alloc_compute_aligned(ltbno, ltlen,
|
||||
args->alignment, args->minlen,
|
||||
<bnoa, <lena);
|
||||
/*
|
||||
* The right one is clearly better.
|
||||
*/
|
||||
if (ltbnoa <= args->agbno - gtdiff) {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_lt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_lt = NULL;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If we reach a big enough entry,
|
||||
* compare the two and pick the best.
|
||||
*/
|
||||
if (ltlena >= args->minlen) {
|
||||
args->len = XFS_EXTLEN_MIN(
|
||||
ltlena, args->maxlen);
|
||||
xfs_alloc_fix_len(args);
|
||||
rlen = args->len;
|
||||
ltdiff = xfs_alloc_compute_diff(
|
||||
args->agbno, rlen,
|
||||
args->alignment,
|
||||
ltbno, ltlen, <new);
|
||||
/*
|
||||
* Left side is better.
|
||||
*/
|
||||
if (ltdiff < gtdiff) {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_gt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_gt = NULL;
|
||||
}
|
||||
/*
|
||||
* Right side is better.
|
||||
*/
|
||||
else {
|
||||
xfs_btree_del_cursor(
|
||||
bno_cur_lt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_lt = NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Fell off the left end.
|
||||
*/
|
||||
if ((error = xfs_btree_decrement(
|
||||
bno_cur_lt, 0, &i)))
|
||||
goto error0;
|
||||
if (!i) {
|
||||
xfs_btree_del_cursor(bno_cur_lt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_lt = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* The right side is perfect, trash the left side.
|
||||
*/
|
||||
else {
|
||||
xfs_btree_del_cursor(bno_cur_lt,
|
||||
XFS_BTREE_NOERROR);
|
||||
bno_cur_lt = NULL;
|
||||
}
|
||||
|
||||
error = xfs_alloc_find_best_extent(args,
|
||||
&bno_cur_gt, &bno_cur_lt,
|
||||
gtdiff, <bno, <len, <lena,
|
||||
1 /* search left */);
|
||||
}
|
||||
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we couldn't get anything, give up.
|
||||
*/
|
||||
|
@ -1130,6 +1067,7 @@ xfs_alloc_ag_vextent_near(
|
|||
args->agbno = NULLAGBLOCK;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we have selected a freespace entry, either to the
|
||||
* left or to the right. If it's on the right, copy all the
|
||||
|
@ -1146,6 +1084,7 @@ xfs_alloc_ag_vextent_near(
|
|||
j = 1;
|
||||
} else
|
||||
j = 0;
|
||||
|
||||
/*
|
||||
* Fix up the length and compute the useful address.
|
||||
*/
|
||||
|
|
|
@ -637,7 +637,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
|
|||
* It didn't all fit, so we have to sort everything on hashval.
|
||||
*/
|
||||
sbsize = sf->hdr.count * sizeof(*sbuf);
|
||||
sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
|
||||
sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
|
||||
|
||||
/*
|
||||
* Scan the attribute list for the rest of the entries, storing
|
||||
|
@ -2386,7 +2386,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
|
|||
args.dp = context->dp;
|
||||
args.whichfork = XFS_ATTR_FORK;
|
||||
args.valuelen = valuelen;
|
||||
args.value = kmem_alloc(valuelen, KM_SLEEP);
|
||||
args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
|
||||
args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
|
||||
args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
|
||||
retval = xfs_attr_rmtval_get(&args);
|
||||
|
|
|
@ -634,9 +634,8 @@ xfs_btree_read_bufl(
|
|||
return error;
|
||||
}
|
||||
ASSERT(!bp || !XFS_BUF_GETERROR(bp));
|
||||
if (bp != NULL) {
|
||||
if (bp)
|
||||
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
|
||||
}
|
||||
*bpp = bp;
|
||||
return 0;
|
||||
}
|
||||
|
@ -944,13 +943,13 @@ xfs_btree_set_refs(
|
|||
switch (cur->bc_btnum) {
|
||||
case XFS_BTNUM_BNO:
|
||||
case XFS_BTNUM_CNT:
|
||||
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
|
||||
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
|
||||
break;
|
||||
case XFS_BTNUM_INO:
|
||||
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
|
||||
XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
|
||||
break;
|
||||
case XFS_BTNUM_BMAP:
|
||||
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
|
||||
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
|
|
|
@ -142,7 +142,7 @@ xfs_buf_item_log_check(
|
|||
#endif
|
||||
|
||||
STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
|
||||
STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
|
||||
STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
|
||||
|
||||
/*
|
||||
* This returns the number of log iovecs needed to log the
|
||||
|
@ -450,7 +450,7 @@ xfs_buf_item_unpin(
|
|||
* xfs_trans_ail_delete() drops the AIL lock.
|
||||
*/
|
||||
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
|
||||
xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
|
||||
xfs_buf_do_callbacks(bp);
|
||||
XFS_BUF_SET_FSPRIVATE(bp, NULL);
|
||||
XFS_BUF_CLR_IODONE_FUNC(bp);
|
||||
} else {
|
||||
|
@ -918,15 +918,26 @@ xfs_buf_attach_iodone(
|
|||
XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can have many callbacks on a buffer. Running the callbacks individually
|
||||
* can cause a lot of contention on the AIL lock, so we allow for a single
|
||||
* callback to be able to scan the remaining lip->li_bio_list for other items
|
||||
* of the same type and callback to be processed in the first call.
|
||||
*
|
||||
* As a result, the loop walking the callback list below will also modify the
|
||||
* list. it removes the first item from the list and then runs the callback.
|
||||
* The loop then restarts from the new head of the list. This allows the
|
||||
* callback to scan and modify the list attached to the buffer and we don't
|
||||
* have to care about maintaining a next item pointer.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_buf_do_callbacks(
|
||||
xfs_buf_t *bp,
|
||||
xfs_log_item_t *lip)
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
xfs_log_item_t *nlip;
|
||||
struct xfs_log_item *lip;
|
||||
|
||||
while (lip != NULL) {
|
||||
nlip = lip->li_bio_list;
|
||||
while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
|
||||
XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
|
||||
ASSERT(lip->li_cb != NULL);
|
||||
/*
|
||||
* Clear the next pointer so we don't have any
|
||||
|
@ -936,7 +947,6 @@ xfs_buf_do_callbacks(
|
|||
*/
|
||||
lip->li_bio_list = NULL;
|
||||
lip->li_cb(bp, lip);
|
||||
lip = nlip;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -970,7 +980,7 @@ xfs_buf_iodone_callbacks(
|
|||
ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
|
||||
XFS_BUF_SUPER_STALE(bp);
|
||||
trace_xfs_buf_item_iodone(bp, _RET_IP_);
|
||||
xfs_buf_do_callbacks(bp, lip);
|
||||
xfs_buf_do_callbacks(bp);
|
||||
XFS_BUF_SET_FSPRIVATE(bp, NULL);
|
||||
XFS_BUF_CLR_IODONE_FUNC(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
|
@ -1029,7 +1039,7 @@ xfs_buf_iodone_callbacks(
|
|||
return;
|
||||
}
|
||||
|
||||
xfs_buf_do_callbacks(bp, lip);
|
||||
xfs_buf_do_callbacks(bp);
|
||||
XFS_BUF_SET_FSPRIVATE(bp, NULL);
|
||||
XFS_BUF_CLR_IODONE_FUNC(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
|
@ -1063,7 +1073,7 @@ xfs_buf_error_relse(
|
|||
* We have to unpin the pinned buffers so do the
|
||||
* callbacks.
|
||||
*/
|
||||
xfs_buf_do_callbacks(bp, lip);
|
||||
xfs_buf_do_callbacks(bp);
|
||||
XFS_BUF_SET_FSPRIVATE(bp, NULL);
|
||||
XFS_BUF_CLR_IODONE_FUNC(bp);
|
||||
XFS_BUF_SET_BRELSE_FUNC(bp,NULL);
|
||||
|
|
|
@ -105,17 +105,6 @@ typedef struct xfs_buf_log_item {
|
|||
xfs_buf_log_format_t bli_format; /* in-log header */
|
||||
} xfs_buf_log_item_t;
|
||||
|
||||
/*
|
||||
* This structure is used during recovery to record the buf log
|
||||
* items which have been canceled and should not be replayed.
|
||||
*/
|
||||
typedef struct xfs_buf_cancel {
|
||||
xfs_daddr_t bc_blkno;
|
||||
uint bc_len;
|
||||
int bc_refcount;
|
||||
struct xfs_buf_cancel *bc_next;
|
||||
} xfs_buf_cancel_t;
|
||||
|
||||
void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
|
||||
void xfs_buf_item_relse(struct xfs_buf *);
|
||||
void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
|
||||
|
|
|
@ -47,6 +47,28 @@ xfs_efi_item_free(
|
|||
kmem_zone_free(xfs_efi_zone, efip);
|
||||
}
|
||||
|
||||
/*
|
||||
* Freeing the efi requires that we remove it from the AIL if it has already
|
||||
* been placed there. However, the EFI may not yet have been placed in the AIL
|
||||
* when called by xfs_efi_release() from EFD processing due to the ordering of
|
||||
* committed vs unpin operations in bulk insert operations. Hence the
|
||||
* test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
|
||||
* the EFI.
|
||||
*/
|
||||
STATIC void
|
||||
__xfs_efi_release(
|
||||
struct xfs_efi_log_item *efip)
|
||||
{
|
||||
struct xfs_ail *ailp = efip->efi_item.li_ailp;
|
||||
|
||||
if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
|
||||
spin_lock(&ailp->xa_lock);
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, &efip->efi_item);
|
||||
xfs_efi_item_free(efip);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This returns the number of iovecs needed to log the given efi item.
|
||||
* We only need 1 iovec for an efi item. It just logs the efi_log_format
|
||||
|
@ -74,7 +96,8 @@ xfs_efi_item_format(
|
|||
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
|
||||
uint size;
|
||||
|
||||
ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
|
||||
ASSERT(atomic_read(&efip->efi_next_extent) ==
|
||||
efip->efi_format.efi_nextents);
|
||||
|
||||
efip->efi_format.efi_type = XFS_LI_EFI;
|
||||
|
||||
|
@ -99,10 +122,12 @@ xfs_efi_item_pin(
|
|||
}
|
||||
|
||||
/*
|
||||
* While EFIs cannot really be pinned, the unpin operation is the
|
||||
* last place at which the EFI is manipulated during a transaction.
|
||||
* Here we coordinate with xfs_efi_cancel() to determine who gets to
|
||||
* free the EFI.
|
||||
* While EFIs cannot really be pinned, the unpin operation is the last place at
|
||||
* which the EFI is manipulated during a transaction. If we are being asked to
|
||||
* remove the EFI it's because the transaction has been cancelled and by
|
||||
* definition that means the EFI cannot be in the AIL so remove it from the
|
||||
* transaction and free it. Otherwise coordinate with xfs_efi_release() (via
|
||||
* XFS_EFI_COMMITTED) to determine who gets to free the EFI.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_efi_item_unpin(
|
||||
|
@ -110,20 +135,14 @@ xfs_efi_item_unpin(
|
|||
int remove)
|
||||
{
|
||||
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
|
||||
struct xfs_ail *ailp = lip->li_ailp;
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (efip->efi_flags & XFS_EFI_CANCELED) {
|
||||
if (remove)
|
||||
xfs_trans_del_item(lip);
|
||||
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
if (remove) {
|
||||
ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
|
||||
xfs_trans_del_item(lip);
|
||||
xfs_efi_item_free(efip);
|
||||
} else {
|
||||
efip->efi_flags |= XFS_EFI_COMMITTED;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
return;
|
||||
}
|
||||
__xfs_efi_release(efip);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -152,16 +171,20 @@ xfs_efi_item_unlock(
|
|||
}
|
||||
|
||||
/*
|
||||
* The EFI is logged only once and cannot be moved in the log, so
|
||||
* simply return the lsn at which it's been logged. The canceled
|
||||
* flag is not paid any attention here. Checking for that is delayed
|
||||
* until the EFI is unpinned.
|
||||
* The EFI is logged only once and cannot be moved in the log, so simply return
|
||||
* the lsn at which it's been logged. For bulk transaction committed
|
||||
* processing, the EFI may be processed but not yet unpinned prior to the EFD
|
||||
* being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
|
||||
* when processing the EFD.
|
||||
*/
|
||||
STATIC xfs_lsn_t
|
||||
xfs_efi_item_committed(
|
||||
struct xfs_log_item *lip,
|
||||
xfs_lsn_t lsn)
|
||||
{
|
||||
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
|
||||
|
||||
set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
|
||||
return lsn;
|
||||
}
|
||||
|
||||
|
@ -230,6 +253,7 @@ xfs_efi_init(
|
|||
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
|
||||
efip->efi_format.efi_nextents = nextents;
|
||||
efip->efi_format.efi_id = (__psint_t)(void*)efip;
|
||||
atomic_set(&efip->efi_next_extent, 0);
|
||||
|
||||
return efip;
|
||||
}
|
||||
|
@ -289,37 +313,18 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
|
|||
}
|
||||
|
||||
/*
|
||||
* This is called by the efd item code below to release references to
|
||||
* the given efi item. Each efd calls this with the number of
|
||||
* extents that it has logged, and when the sum of these reaches
|
||||
* the total number of extents logged by this efi item we can free
|
||||
* the efi item.
|
||||
*
|
||||
* Freeing the efi item requires that we remove it from the AIL.
|
||||
* We'll use the AIL lock to protect our counters as well as
|
||||
* the removal from the AIL.
|
||||
* This is called by the efd item code below to release references to the given
|
||||
* efi item. Each efd calls this with the number of extents that it has
|
||||
* logged, and when the sum of these reaches the total number of extents logged
|
||||
* by this efi item we can free the efi item.
|
||||
*/
|
||||
void
|
||||
xfs_efi_release(xfs_efi_log_item_t *efip,
|
||||
uint nextents)
|
||||
{
|
||||
struct xfs_ail *ailp = efip->efi_item.li_ailp;
|
||||
int extents_left;
|
||||
|
||||
ASSERT(efip->efi_next_extent > 0);
|
||||
ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
ASSERT(efip->efi_next_extent >= nextents);
|
||||
efip->efi_next_extent -= nextents;
|
||||
extents_left = efip->efi_next_extent;
|
||||
if (extents_left == 0) {
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
|
||||
xfs_efi_item_free(efip);
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
|
||||
if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
|
||||
__xfs_efi_release(efip);
|
||||
}
|
||||
|
||||
static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
|
||||
|
|
|
@ -111,11 +111,10 @@ typedef struct xfs_efd_log_format_64 {
|
|||
#define XFS_EFI_MAX_FAST_EXTENTS 16
|
||||
|
||||
/*
|
||||
* Define EFI flags.
|
||||
* Define EFI flag bits. Manipulated by set/clear/test_bit operators.
|
||||
*/
|
||||
#define XFS_EFI_RECOVERED 0x1
|
||||
#define XFS_EFI_COMMITTED 0x2
|
||||
#define XFS_EFI_CANCELED 0x4
|
||||
#define XFS_EFI_RECOVERED 1
|
||||
#define XFS_EFI_COMMITTED 2
|
||||
|
||||
/*
|
||||
* This is the "extent free intention" log item. It is used
|
||||
|
@ -125,8 +124,8 @@ typedef struct xfs_efd_log_format_64 {
|
|||
*/
|
||||
typedef struct xfs_efi_log_item {
|
||||
xfs_log_item_t efi_item;
|
||||
uint efi_flags; /* misc flags */
|
||||
uint efi_next_extent;
|
||||
atomic_t efi_next_extent;
|
||||
unsigned long efi_flags; /* misc flags */
|
||||
xfs_efi_log_format_t efi_format;
|
||||
} xfs_efi_log_item_t;
|
||||
|
||||
|
|
|
@ -374,6 +374,7 @@ xfs_growfs_data_private(
|
|||
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
|
||||
} else
|
||||
mp->m_maxicount = 0;
|
||||
xfs_set_low_space_thresholds(mp);
|
||||
|
||||
/* update secondary superblocks. */
|
||||
for (agno = 1; agno < nagcount; agno++) {
|
||||
|
|
|
@ -42,6 +42,17 @@
|
|||
#include "xfs_trace.h"
|
||||
|
||||
|
||||
/*
|
||||
* Define xfs inode iolock lockdep classes. We need to ensure that all active
|
||||
* inodes are considered the same for lockdep purposes, including inodes that
|
||||
* are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
|
||||
* guarantee the locks are considered the same when there are multiple lock
|
||||
* initialisation siteѕ. Also, define a reclaimable inode class so it is
|
||||
* obvious in lockdep reports which class the report is against.
|
||||
*/
|
||||
static struct lock_class_key xfs_iolock_active;
|
||||
struct lock_class_key xfs_iolock_reclaimable;
|
||||
|
||||
/*
|
||||
* Allocate and initialise an xfs_inode.
|
||||
*/
|
||||
|
@ -69,8 +80,11 @@ xfs_inode_alloc(
|
|||
ASSERT(atomic_read(&ip->i_pincount) == 0);
|
||||
ASSERT(!spin_is_locked(&ip->i_flags_lock));
|
||||
ASSERT(completion_done(&ip->i_flush));
|
||||
ASSERT(ip->i_ino == 0);
|
||||
|
||||
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
|
||||
lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
|
||||
&xfs_iolock_active, "xfs_iolock_active");
|
||||
|
||||
/* initialise the xfs inode */
|
||||
ip->i_ino = ino;
|
||||
|
@ -85,9 +99,6 @@ xfs_inode_alloc(
|
|||
ip->i_size = 0;
|
||||
ip->i_new_size = 0;
|
||||
|
||||
/* prevent anyone from using this yet */
|
||||
VFS_I(ip)->i_state = I_NEW;
|
||||
|
||||
return ip;
|
||||
}
|
||||
|
||||
|
@ -145,7 +156,18 @@ xfs_inode_free(
|
|||
ASSERT(!spin_is_locked(&ip->i_flags_lock));
|
||||
ASSERT(completion_done(&ip->i_flush));
|
||||
|
||||
call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
|
||||
/*
|
||||
* Because we use RCU freeing we need to ensure the inode always
|
||||
* appears to be reclaimed with an invalid inode number when in the
|
||||
* free state. The ip->i_flags_lock provides the barrier against lookup
|
||||
* races.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ip->i_flags = XFS_IRECLAIM;
|
||||
ip->i_ino = 0;
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -155,14 +177,29 @@ static int
|
|||
xfs_iget_cache_hit(
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_inode *ip,
|
||||
xfs_ino_t ino,
|
||||
int flags,
|
||||
int lock_flags) __releases(pag->pag_ici_lock)
|
||||
int lock_flags) __releases(RCU)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* check for re-use of an inode within an RCU grace period due to the
|
||||
* radix tree nodes not being updated yet. We monitor for this by
|
||||
* setting the inode number to zero before freeing the inode structure.
|
||||
* If the inode has been reallocated and set up, then the inode number
|
||||
* will not match, so check for that, too.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (ip->i_ino != ino) {
|
||||
trace_xfs_iget_skip(ip);
|
||||
XFS_STATS_INC(xs_ig_frecycle);
|
||||
error = EAGAIN;
|
||||
goto out_error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If we are racing with another cache hit that is currently
|
||||
|
@ -205,7 +242,7 @@ xfs_iget_cache_hit(
|
|||
ip->i_flags |= XFS_IRECLAIM;
|
||||
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
error = -inode_init_always(mp->m_super, inode);
|
||||
if (error) {
|
||||
|
@ -213,7 +250,7 @@ xfs_iget_cache_hit(
|
|||
* Re-initializing the inode failed, and we are in deep
|
||||
* trouble. Try to re-add it to the reclaim list.
|
||||
*/
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
|
||||
ip->i_flags &= ~XFS_INEW;
|
||||
|
@ -223,14 +260,20 @@ xfs_iget_cache_hit(
|
|||
goto out_error;
|
||||
}
|
||||
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
|
||||
ip->i_flags |= XFS_INEW;
|
||||
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
|
||||
inode->i_state = I_NEW;
|
||||
|
||||
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
|
||||
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
|
||||
lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
|
||||
&xfs_iolock_active, "xfs_iolock_active");
|
||||
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
spin_unlock(&pag->pag_ici_lock);
|
||||
} else {
|
||||
/* If the VFS inode is being torn down, pause and try again. */
|
||||
if (!igrab(inode)) {
|
||||
|
@ -241,7 +284,7 @@ xfs_iget_cache_hit(
|
|||
|
||||
/* We've got a live one. */
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
trace_xfs_iget_hit(ip);
|
||||
}
|
||||
|
||||
|
@ -255,7 +298,7 @@ xfs_iget_cache_hit(
|
|||
|
||||
out_error:
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -308,7 +351,7 @@ xfs_iget_cache_miss(
|
|||
BUG();
|
||||
}
|
||||
|
||||
write_lock(&pag->pag_ici_lock);
|
||||
spin_lock(&pag->pag_ici_lock);
|
||||
|
||||
/* insert the new inode */
|
||||
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
|
||||
|
@ -323,14 +366,14 @@ xfs_iget_cache_miss(
|
|||
ip->i_udquot = ip->i_gdquot = NULL;
|
||||
xfs_iflags_set(ip, XFS_INEW);
|
||||
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
spin_unlock(&pag->pag_ici_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
*ipp = ip;
|
||||
return 0;
|
||||
|
||||
out_preload_end:
|
||||
write_unlock(&pag->pag_ici_lock);
|
||||
spin_unlock(&pag->pag_ici_lock);
|
||||
radix_tree_preload_end();
|
||||
if (lock_flags)
|
||||
xfs_iunlock(ip, lock_flags);
|
||||
|
@ -377,7 +420,7 @@ xfs_iget(
|
|||
xfs_agino_t agino;
|
||||
|
||||
/* reject inode numbers outside existing AGs */
|
||||
if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
|
||||
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
|
||||
return EINVAL;
|
||||
|
||||
/* get the perag structure and ensure that it's inode capable */
|
||||
|
@ -386,15 +429,15 @@ xfs_iget(
|
|||
|
||||
again:
|
||||
error = 0;
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
|
||||
|
||||
if (ip) {
|
||||
error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
|
||||
error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
|
||||
if (error)
|
||||
goto out_error_or_again;
|
||||
} else {
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
XFS_STATS_INC(xs_ig_missed);
|
||||
|
||||
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
|
||||
|
|
|
@ -887,7 +887,7 @@ xfs_iread(
|
|||
* around for a while. This helps to keep recently accessed
|
||||
* meta-data in-core longer.
|
||||
*/
|
||||
XFS_BUF_SET_REF(bp, XFS_INO_REF);
|
||||
xfs_buf_set_ref(bp, XFS_INO_REF);
|
||||
|
||||
/*
|
||||
* Use xfs_trans_brelse() to release the buffer containing the
|
||||
|
@ -2000,16 +2000,32 @@ xfs_ifree_cluster(
|
|||
*/
|
||||
for (i = 0; i < ninodes; i++) {
|
||||
retry:
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
ip = radix_tree_lookup(&pag->pag_ici_root,
|
||||
XFS_INO_TO_AGINO(mp, (inum + i)));
|
||||
|
||||
/* Inode not in memory or stale, nothing to do */
|
||||
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
/* Inode not in memory, nothing to do */
|
||||
if (!ip) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* because this is an RCU protected lookup, we could
|
||||
* find a recently freed or even reallocated inode
|
||||
* during the lookup. We need to check under the
|
||||
* i_flags_lock for a valid inode here. Skip it if it
|
||||
* is not valid, the wrong inode or stale.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (ip->i_ino != inum + i ||
|
||||
__xfs_iflags_test(ip, XFS_ISTALE)) {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
/*
|
||||
* Don't try to lock/unlock the current inode, but we
|
||||
* _cannot_ skip the other inodes that we did not find
|
||||
|
@ -2019,11 +2035,11 @@ xfs_ifree_cluster(
|
|||
*/
|
||||
if (ip != free_ip &&
|
||||
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
delay(1);
|
||||
goto retry;
|
||||
}
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
xfs_iflock(ip);
|
||||
xfs_iflags_set(ip, XFS_ISTALE);
|
||||
|
@ -2629,7 +2645,7 @@ xfs_iflush_cluster(
|
|||
|
||||
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
|
||||
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
rcu_read_lock();
|
||||
/* really need a gang lookup range call here */
|
||||
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
|
||||
first_index, inodes_per_cluster);
|
||||
|
@ -2640,9 +2656,21 @@ xfs_iflush_cluster(
|
|||
iq = ilist[i];
|
||||
if (iq == ip)
|
||||
continue;
|
||||
/* if the inode lies outside this cluster, we're done. */
|
||||
if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
|
||||
break;
|
||||
|
||||
/*
|
||||
* because this is an RCU protected lookup, we could find a
|
||||
* recently freed or even reallocated inode during the lookup.
|
||||
* We need to check under the i_flags_lock for a valid inode
|
||||
* here. Skip it if it is not valid or the wrong inode.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (!ip->i_ino ||
|
||||
(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
/*
|
||||
* Do an un-protected check to see if the inode is dirty and
|
||||
* is a candidate for flushing. These checks will be repeated
|
||||
|
@ -2692,7 +2720,7 @@ xfs_iflush_cluster(
|
|||
}
|
||||
|
||||
out_free:
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
kmem_free(ilist);
|
||||
out_put:
|
||||
xfs_perag_put(pag);
|
||||
|
@ -2704,7 +2732,7 @@ xfs_iflush_cluster(
|
|||
* Corruption detected in the clustering loop. Invalidate the
|
||||
* inode buffer and shut down the filesystem.
|
||||
*/
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* Clean up the buffer. If it was B_DELWRI, just release it --
|
||||
* brelse can handle it with no problems. If not, shut down the
|
||||
|
|
|
@ -376,12 +376,13 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
|
|||
/*
|
||||
* In-core inode flags.
|
||||
*/
|
||||
#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */
|
||||
#define XFS_ISTALE 0x0002 /* inode has been staled */
|
||||
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
|
||||
#define XFS_INEW 0x0008 /* inode has just been allocated */
|
||||
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
|
||||
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
|
||||
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
|
||||
#define XFS_ISTALE 0x0002 /* inode has been staled */
|
||||
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
|
||||
#define XFS_INEW 0x0008 /* inode has just been allocated */
|
||||
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
|
||||
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
|
||||
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
|
||||
|
||||
/*
|
||||
* Flags for inode locking.
|
||||
|
@ -438,6 +439,8 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
|
|||
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
|
||||
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
|
||||
|
||||
extern struct lock_class_key xfs_iolock_reclaimable;
|
||||
|
||||
/*
|
||||
* Flags for xfs_itruncate_start().
|
||||
*/
|
||||
|
|
|
@ -842,15 +842,64 @@ xfs_inode_item_destroy(
|
|||
* flushed to disk. It is responsible for removing the inode item
|
||||
* from the AIL if it has not been re-logged, and unlocking the inode's
|
||||
* flush lock.
|
||||
*
|
||||
* To reduce AIL lock traffic as much as possible, we scan the buffer log item
|
||||
* list for other inodes that will run this function. We remove them from the
|
||||
* buffer list so we can process all the inode IO completions in one AIL lock
|
||||
* traversal.
|
||||
*/
|
||||
void
|
||||
xfs_iflush_done(
|
||||
struct xfs_buf *bp,
|
||||
struct xfs_log_item *lip)
|
||||
{
|
||||
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
|
||||
xfs_inode_t *ip = iip->ili_inode;
|
||||
struct xfs_inode_log_item *iip;
|
||||
struct xfs_log_item *blip;
|
||||
struct xfs_log_item *next;
|
||||
struct xfs_log_item *prev;
|
||||
struct xfs_ail *ailp = lip->li_ailp;
|
||||
int need_ail = 0;
|
||||
|
||||
/*
|
||||
* Scan the buffer IO completions for other inodes being completed and
|
||||
* attach them to the current inode log item.
|
||||
*/
|
||||
blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
|
||||
prev = NULL;
|
||||
while (blip != NULL) {
|
||||
if (lip->li_cb != xfs_iflush_done) {
|
||||
prev = blip;
|
||||
blip = blip->li_bio_list;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* remove from list */
|
||||
next = blip->li_bio_list;
|
||||
if (!prev) {
|
||||
XFS_BUF_SET_FSPRIVATE(bp, next);
|
||||
} else {
|
||||
prev->li_bio_list = next;
|
||||
}
|
||||
|
||||
/* add to current list */
|
||||
blip->li_bio_list = lip->li_bio_list;
|
||||
lip->li_bio_list = blip;
|
||||
|
||||
/*
|
||||
* while we have the item, do the unlocked check for needing
|
||||
* the AIL lock.
|
||||
*/
|
||||
iip = INODE_ITEM(blip);
|
||||
if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
|
||||
need_ail++;
|
||||
|
||||
blip = next;
|
||||
}
|
||||
|
||||
/* make sure we capture the state of the initial inode. */
|
||||
iip = INODE_ITEM(lip);
|
||||
if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
|
||||
need_ail++;
|
||||
|
||||
/*
|
||||
* We only want to pull the item from the AIL if it is
|
||||
|
@ -861,28 +910,37 @@ xfs_iflush_done(
|
|||
* the lock since it's cheaper, and then we recheck while
|
||||
* holding the lock before removing the inode from the AIL.
|
||||
*/
|
||||
if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) {
|
||||
if (need_ail) {
|
||||
struct xfs_log_item *log_items[need_ail];
|
||||
int i = 0;
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (lip->li_lsn == iip->ili_flush_lsn) {
|
||||
/* xfs_trans_ail_delete() drops the AIL lock. */
|
||||
xfs_trans_ail_delete(ailp, lip);
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
for (blip = lip; blip; blip = blip->li_bio_list) {
|
||||
iip = INODE_ITEM(blip);
|
||||
if (iip->ili_logged &&
|
||||
blip->li_lsn == iip->ili_flush_lsn) {
|
||||
log_items[i++] = blip;
|
||||
}
|
||||
ASSERT(i <= need_ail);
|
||||
}
|
||||
/* xfs_trans_ail_delete_bulk() drops the AIL lock. */
|
||||
xfs_trans_ail_delete_bulk(ailp, log_items, i);
|
||||
}
|
||||
|
||||
iip->ili_logged = 0;
|
||||
|
||||
/*
|
||||
* Clear the ili_last_fields bits now that we know that the
|
||||
* data corresponding to them is safely on disk.
|
||||
* clean up and unlock the flush lock now we are done. We can clear the
|
||||
* ili_last_fields bits now that we know that the data corresponding to
|
||||
* them is safely on disk.
|
||||
*/
|
||||
iip->ili_last_fields = 0;
|
||||
for (blip = lip; blip; blip = next) {
|
||||
next = blip->li_bio_list;
|
||||
blip->li_bio_list = NULL;
|
||||
|
||||
/*
|
||||
* Release the inode's flush lock since we're done with it.
|
||||
*/
|
||||
xfs_ifunlock(ip);
|
||||
iip = INODE_ITEM(blip);
|
||||
iip->ili_logged = 0;
|
||||
iip->ili_last_fields = 0;
|
||||
xfs_ifunlock(iip->ili_inode);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -47,127 +47,8 @@
|
|||
|
||||
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
|
||||
<< mp->m_writeio_log)
|
||||
#define XFS_STRAT_WRITE_IMAPS 2
|
||||
#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
|
||||
|
||||
STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
|
||||
int, struct xfs_bmbt_irec *, int *);
|
||||
STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
|
||||
struct xfs_bmbt_irec *, int *);
|
||||
STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
|
||||
struct xfs_bmbt_irec *, int *);
|
||||
|
||||
int
|
||||
xfs_iomap(
|
||||
struct xfs_inode *ip,
|
||||
xfs_off_t offset,
|
||||
ssize_t count,
|
||||
int flags,
|
||||
struct xfs_bmbt_irec *imap,
|
||||
int *nimaps,
|
||||
int *new)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb, end_fsb;
|
||||
int error = 0;
|
||||
int lockmode = 0;
|
||||
int bmapi_flags = 0;
|
||||
|
||||
ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
|
||||
|
||||
*new = 0;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return XFS_ERROR(EIO);
|
||||
|
||||
trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
|
||||
|
||||
switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
|
||||
case BMAPI_READ:
|
||||
lockmode = xfs_ilock_map_shared(ip);
|
||||
bmapi_flags = XFS_BMAPI_ENTIRE;
|
||||
break;
|
||||
case BMAPI_WRITE:
|
||||
lockmode = XFS_ILOCK_EXCL;
|
||||
if (flags & BMAPI_IGNSTATE)
|
||||
bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
|
||||
xfs_ilock(ip, lockmode);
|
||||
break;
|
||||
case BMAPI_ALLOCATE:
|
||||
lockmode = XFS_ILOCK_SHARED;
|
||||
bmapi_flags = XFS_BMAPI_ENTIRE;
|
||||
|
||||
/* Attempt non-blocking lock */
|
||||
if (flags & BMAPI_TRYLOCK) {
|
||||
if (!xfs_ilock_nowait(ip, lockmode))
|
||||
return XFS_ERROR(EAGAIN);
|
||||
} else {
|
||||
xfs_ilock(ip, lockmode);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
ASSERT(offset <= mp->m_maxioffset);
|
||||
if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
|
||||
count = mp->m_maxioffset - offset;
|
||||
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
|
||||
error = xfs_bmapi(NULL, ip, offset_fsb,
|
||||
(xfs_filblks_t)(end_fsb - offset_fsb),
|
||||
bmapi_flags, NULL, 0, imap,
|
||||
nimaps, NULL);
|
||||
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
|
||||
case BMAPI_WRITE:
|
||||
/* If we found an extent, return it */
|
||||
if (*nimaps &&
|
||||
(imap->br_startblock != HOLESTARTBLOCK) &&
|
||||
(imap->br_startblock != DELAYSTARTBLOCK)) {
|
||||
trace_xfs_iomap_found(ip, offset, count, flags, imap);
|
||||
break;
|
||||
}
|
||||
|
||||
if (flags & BMAPI_DIRECT) {
|
||||
error = xfs_iomap_write_direct(ip, offset, count, flags,
|
||||
imap, nimaps);
|
||||
} else {
|
||||
error = xfs_iomap_write_delay(ip, offset, count, flags,
|
||||
imap, nimaps);
|
||||
}
|
||||
if (!error) {
|
||||
trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
|
||||
}
|
||||
*new = 1;
|
||||
break;
|
||||
case BMAPI_ALLOCATE:
|
||||
/* If we found an extent, return it */
|
||||
xfs_iunlock(ip, lockmode);
|
||||
lockmode = 0;
|
||||
|
||||
if (*nimaps && !isnullstartblock(imap->br_startblock)) {
|
||||
trace_xfs_iomap_found(ip, offset, count, flags, imap);
|
||||
break;
|
||||
}
|
||||
|
||||
error = xfs_iomap_write_allocate(ip, offset, count,
|
||||
imap, nimaps);
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(*nimaps <= 1);
|
||||
|
||||
out:
|
||||
if (lockmode)
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return XFS_ERROR(error);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_iomap_eof_align_last_fsb(
|
||||
xfs_mount_t *mp,
|
||||
|
@ -236,14 +117,13 @@ xfs_cmn_err_fsblock_zero(
|
|||
return EFSCORRUPTED;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
int
|
||||
xfs_iomap_write_direct(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset,
|
||||
size_t count,
|
||||
int flags,
|
||||
xfs_bmbt_irec_t *imap,
|
||||
int *nmaps)
|
||||
int nmaps)
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb;
|
||||
|
@ -279,7 +159,7 @@ xfs_iomap_write_direct(
|
|||
if (error)
|
||||
goto error_out;
|
||||
} else {
|
||||
if (*nmaps && (imap->br_startblock == HOLESTARTBLOCK))
|
||||
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
|
||||
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
|
||||
imap->br_blockcount +
|
||||
imap->br_startoff);
|
||||
|
@ -331,7 +211,7 @@ xfs_iomap_write_direct(
|
|||
xfs_trans_ijoin(tp, ip);
|
||||
|
||||
bmapi_flag = XFS_BMAPI_WRITE;
|
||||
if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
|
||||
if (offset < ip->i_size || extsz)
|
||||
bmapi_flag |= XFS_BMAPI_PREALLOC;
|
||||
|
||||
/*
|
||||
|
@ -370,7 +250,6 @@ xfs_iomap_write_direct(
|
|||
goto error_out;
|
||||
}
|
||||
|
||||
*nmaps = 1;
|
||||
return 0;
|
||||
|
||||
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
|
||||
|
@ -379,7 +258,6 @@ xfs_iomap_write_direct(
|
|||
|
||||
error1: /* Just cancel transaction */
|
||||
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
|
||||
*nmaps = 0; /* nothing set-up here */
|
||||
|
||||
error_out:
|
||||
return XFS_ERROR(error);
|
||||
|
@ -389,6 +267,9 @@ xfs_iomap_write_direct(
|
|||
* If the caller is doing a write at the end of the file, then extend the
|
||||
* allocation out to the file system's write iosize. We clean up any extra
|
||||
* space left over when the file is closed in xfs_inactive().
|
||||
*
|
||||
* If we find we already have delalloc preallocation beyond EOF, don't do more
|
||||
* preallocation as it it not needed.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_iomap_eof_want_preallocate(
|
||||
|
@ -396,7 +277,6 @@ xfs_iomap_eof_want_preallocate(
|
|||
xfs_inode_t *ip,
|
||||
xfs_off_t offset,
|
||||
size_t count,
|
||||
int ioflag,
|
||||
xfs_bmbt_irec_t *imap,
|
||||
int nimaps,
|
||||
int *prealloc)
|
||||
|
@ -405,6 +285,7 @@ xfs_iomap_eof_want_preallocate(
|
|||
xfs_filblks_t count_fsb;
|
||||
xfs_fsblock_t firstblock;
|
||||
int n, error, imaps;
|
||||
int found_delalloc = 0;
|
||||
|
||||
*prealloc = 0;
|
||||
if ((offset + count) <= ip->i_size)
|
||||
|
@ -429,20 +310,66 @@ xfs_iomap_eof_want_preallocate(
|
|||
return 0;
|
||||
start_fsb += imap[n].br_blockcount;
|
||||
count_fsb -= imap[n].br_blockcount;
|
||||
|
||||
if (imap[n].br_startblock == DELAYSTARTBLOCK)
|
||||
found_delalloc = 1;
|
||||
}
|
||||
}
|
||||
*prealloc = 1;
|
||||
if (!found_delalloc)
|
||||
*prealloc = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
/*
|
||||
* If we don't have a user specified preallocation size, dynamically increase
|
||||
* the preallocation size as the size of the file grows. Cap the maximum size
|
||||
* at a single extent or less if the filesystem is near full. The closer the
|
||||
* filesystem is to full, the smaller the maximum prealocation.
|
||||
*/
|
||||
STATIC xfs_fsblock_t
|
||||
xfs_iomap_prealloc_size(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
xfs_fsblock_t alloc_blocks = 0;
|
||||
|
||||
if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
|
||||
int shift = 0;
|
||||
int64_t freesp;
|
||||
|
||||
alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size);
|
||||
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
|
||||
rounddown_pow_of_two(alloc_blocks));
|
||||
|
||||
xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
|
||||
freesp = mp->m_sb.sb_fdblocks;
|
||||
if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
|
||||
shift = 2;
|
||||
if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
|
||||
shift++;
|
||||
if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
|
||||
shift++;
|
||||
if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
|
||||
shift++;
|
||||
if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
|
||||
shift++;
|
||||
}
|
||||
if (shift)
|
||||
alloc_blocks >>= shift;
|
||||
}
|
||||
|
||||
if (alloc_blocks < mp->m_writeio_blocks)
|
||||
alloc_blocks = mp->m_writeio_blocks;
|
||||
|
||||
return alloc_blocks;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_iomap_write_delay(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset,
|
||||
size_t count,
|
||||
int ioflag,
|
||||
xfs_bmbt_irec_t *ret_imap,
|
||||
int *nmaps)
|
||||
xfs_bmbt_irec_t *ret_imap)
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb;
|
||||
|
@ -469,16 +396,19 @@ xfs_iomap_write_delay(
|
|||
extsz = xfs_get_extsz_hint(ip);
|
||||
offset_fsb = XFS_B_TO_FSBT(mp, offset);
|
||||
|
||||
|
||||
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
|
||||
ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
|
||||
imap, XFS_WRITE_IMAPS, &prealloc);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
retry:
|
||||
if (prealloc) {
|
||||
xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
|
||||
|
||||
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
|
||||
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
|
||||
last_fsb = ioalign + mp->m_writeio_blocks;
|
||||
last_fsb = ioalign + alloc_blocks;
|
||||
} else {
|
||||
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
|
||||
}
|
||||
|
@ -496,22 +426,31 @@ xfs_iomap_write_delay(
|
|||
XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
|
||||
XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
|
||||
&nimaps, NULL);
|
||||
if (error && (error != ENOSPC))
|
||||
switch (error) {
|
||||
case 0:
|
||||
case ENOSPC:
|
||||
case EDQUOT:
|
||||
break;
|
||||
default:
|
||||
return XFS_ERROR(error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If bmapi returned us nothing, and if we didn't get back EDQUOT,
|
||||
* then we must have run out of space - flush all other inodes with
|
||||
* delalloc blocks and retry without EOF preallocation.
|
||||
* If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
|
||||
* ENOSPC, * flush all other inodes with delalloc blocks to free up
|
||||
* some of the excess reserved metadata space. For both cases, retry
|
||||
* without EOF preallocation.
|
||||
*/
|
||||
if (nimaps == 0) {
|
||||
trace_xfs_delalloc_enospc(ip, offset, count);
|
||||
if (flushed)
|
||||
return XFS_ERROR(ENOSPC);
|
||||
return XFS_ERROR(error ? error : ENOSPC);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_flush_inodes(ip);
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (error == ENOSPC) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_flush_inodes(ip);
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
flushed = 1;
|
||||
error = 0;
|
||||
|
@ -523,8 +462,6 @@ xfs_iomap_write_delay(
|
|||
return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
|
||||
|
||||
*ret_imap = imap[0];
|
||||
*nmaps = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -538,13 +475,12 @@ xfs_iomap_write_delay(
|
|||
* We no longer bother to look at the incoming map - all we have to
|
||||
* guarantee is that whatever we allocate fills the required range.
|
||||
*/
|
||||
STATIC int
|
||||
int
|
||||
xfs_iomap_write_allocate(
|
||||
xfs_inode_t *ip,
|
||||
xfs_off_t offset,
|
||||
size_t count,
|
||||
xfs_bmbt_irec_t *imap,
|
||||
int *retmap)
|
||||
xfs_bmbt_irec_t *imap)
|
||||
{
|
||||
xfs_mount_t *mp = ip->i_mount;
|
||||
xfs_fileoff_t offset_fsb, last_block;
|
||||
|
@ -557,8 +493,6 @@ xfs_iomap_write_allocate(
|
|||
int error = 0;
|
||||
int nres;
|
||||
|
||||
*retmap = 0;
|
||||
|
||||
/*
|
||||
* Make sure that the dquots are there.
|
||||
*/
|
||||
|
@ -680,7 +614,6 @@ xfs_iomap_write_allocate(
|
|||
if ((offset_fsb >= imap->br_startoff) &&
|
||||
(offset_fsb < (imap->br_startoff +
|
||||
imap->br_blockcount))) {
|
||||
*retmap = 1;
|
||||
XFS_STATS_INC(xs_xstrat_quick);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -18,30 +18,15 @@
|
|||
#ifndef __XFS_IOMAP_H__
|
||||
#define __XFS_IOMAP_H__
|
||||
|
||||
/* base extent manipulation calls */
|
||||
#define BMAPI_READ (1 << 0) /* read extents */
|
||||
#define BMAPI_WRITE (1 << 1) /* create extents */
|
||||
#define BMAPI_ALLOCATE (1 << 2) /* delayed allocate to real extents */
|
||||
|
||||
/* modifiers */
|
||||
#define BMAPI_IGNSTATE (1 << 4) /* ignore unwritten state on read */
|
||||
#define BMAPI_DIRECT (1 << 5) /* direct instead of buffered write */
|
||||
#define BMAPI_MMA (1 << 6) /* allocate for mmap write */
|
||||
#define BMAPI_TRYLOCK (1 << 7) /* non-blocking request */
|
||||
|
||||
#define BMAPI_FLAGS \
|
||||
{ BMAPI_READ, "READ" }, \
|
||||
{ BMAPI_WRITE, "WRITE" }, \
|
||||
{ BMAPI_ALLOCATE, "ALLOCATE" }, \
|
||||
{ BMAPI_IGNSTATE, "IGNSTATE" }, \
|
||||
{ BMAPI_DIRECT, "DIRECT" }, \
|
||||
{ BMAPI_TRYLOCK, "TRYLOCK" }
|
||||
|
||||
struct xfs_inode;
|
||||
struct xfs_bmbt_irec;
|
||||
|
||||
extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
|
||||
struct xfs_bmbt_irec *, int *, int *);
|
||||
extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
|
||||
struct xfs_bmbt_irec *, int);
|
||||
extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
|
||||
struct xfs_bmbt_irec *);
|
||||
extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
|
||||
struct xfs_bmbt_irec *);
|
||||
extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
|
||||
|
||||
#endif /* __XFS_IOMAP_H__*/
|
||||
|
|
737
fs/xfs/xfs_log.c
737
fs/xfs/xfs_log.c
File diff suppressed because it is too large
Load Diff
|
@ -61,7 +61,7 @@ xlog_cil_init(
|
|||
INIT_LIST_HEAD(&cil->xc_committing);
|
||||
spin_lock_init(&cil->xc_cil_lock);
|
||||
init_rwsem(&cil->xc_ctx_lock);
|
||||
sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
|
||||
init_waitqueue_head(&cil->xc_commit_wait);
|
||||
|
||||
INIT_LIST_HEAD(&ctx->committing);
|
||||
INIT_LIST_HEAD(&ctx->busy_extents);
|
||||
|
@ -361,15 +361,10 @@ xlog_cil_committed(
|
|||
int abort)
|
||||
{
|
||||
struct xfs_cil_ctx *ctx = args;
|
||||
struct xfs_log_vec *lv;
|
||||
int abortflag = abort ? XFS_LI_ABORTED : 0;
|
||||
struct xfs_busy_extent *busyp, *n;
|
||||
|
||||
/* unpin all the log items */
|
||||
for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
|
||||
xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
|
||||
abortflag);
|
||||
}
|
||||
xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
|
||||
ctx->start_lsn, abort);
|
||||
|
||||
list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
|
||||
xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
|
||||
|
@ -568,7 +563,7 @@ xlog_cil_push(
|
|||
* It is still being pushed! Wait for the push to
|
||||
* complete, then start again from the beginning.
|
||||
*/
|
||||
sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
|
||||
xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
@ -592,7 +587,7 @@ xlog_cil_push(
|
|||
*/
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
ctx->commit_lsn = commit_lsn;
|
||||
sv_broadcast(&cil->xc_commit_wait);
|
||||
wake_up_all(&cil->xc_commit_wait);
|
||||
spin_unlock(&cil->xc_cil_lock);
|
||||
|
||||
/* release the hounds! */
|
||||
|
@ -757,7 +752,7 @@ xlog_cil_force_lsn(
|
|||
* It is still being pushed! Wait for the push to
|
||||
* complete, then start again from the beginning.
|
||||
*/
|
||||
sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
|
||||
xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
|
||||
goto restart;
|
||||
}
|
||||
if (ctx->sequence != sequence)
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
struct xfs_buf;
|
||||
struct log;
|
||||
struct xlog_ticket;
|
||||
struct xfs_buf_cancel;
|
||||
struct xfs_mount;
|
||||
|
||||
/*
|
||||
|
@ -54,7 +53,6 @@ struct xfs_mount;
|
|||
BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
|
||||
XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
|
||||
|
||||
|
||||
static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
|
||||
{
|
||||
return ((xfs_lsn_t)cycle << 32) | block;
|
||||
|
@ -133,12 +131,10 @@ static inline uint xlog_get_client_id(__be32 i)
|
|||
*/
|
||||
#define XLOG_TIC_INITED 0x1 /* has been initialized */
|
||||
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
|
||||
#define XLOG_TIC_IN_Q 0x4
|
||||
|
||||
#define XLOG_TIC_FLAGS \
|
||||
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
|
||||
{ XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
|
||||
{ XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
|
||||
{ XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
|
@ -244,9 +240,8 @@ typedef struct xlog_res {
|
|||
} xlog_res_t;
|
||||
|
||||
typedef struct xlog_ticket {
|
||||
sv_t t_wait; /* ticket wait queue : 20 */
|
||||
struct xlog_ticket *t_next; /* :4|8 */
|
||||
struct xlog_ticket *t_prev; /* :4|8 */
|
||||
wait_queue_head_t t_wait; /* ticket wait queue */
|
||||
struct list_head t_queue; /* reserve/write queue */
|
||||
xlog_tid_t t_tid; /* transaction identifier : 4 */
|
||||
atomic_t t_ref; /* ticket reference count : 4 */
|
||||
int t_curr_res; /* current reservation in bytes : 4 */
|
||||
|
@ -353,8 +348,8 @@ typedef union xlog_in_core2 {
|
|||
* and move everything else out to subsequent cachelines.
|
||||
*/
|
||||
typedef struct xlog_in_core {
|
||||
sv_t ic_force_wait;
|
||||
sv_t ic_write_wait;
|
||||
wait_queue_head_t ic_force_wait;
|
||||
wait_queue_head_t ic_write_wait;
|
||||
struct xlog_in_core *ic_next;
|
||||
struct xlog_in_core *ic_prev;
|
||||
struct xfs_buf *ic_bp;
|
||||
|
@ -421,7 +416,7 @@ struct xfs_cil {
|
|||
struct xfs_cil_ctx *xc_ctx;
|
||||
struct rw_semaphore xc_ctx_lock;
|
||||
struct list_head xc_committing;
|
||||
sv_t xc_commit_wait;
|
||||
wait_queue_head_t xc_commit_wait;
|
||||
xfs_lsn_t xc_current_sequence;
|
||||
};
|
||||
|
||||
|
@ -491,7 +486,7 @@ typedef struct log {
|
|||
struct xfs_buftarg *l_targ; /* buftarg of log */
|
||||
uint l_flags;
|
||||
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
|
||||
struct xfs_buf_cancel **l_buf_cancel_table;
|
||||
struct list_head *l_buf_cancel_table;
|
||||
int l_iclog_hsize; /* size of iclog header */
|
||||
int l_iclog_heads; /* # of iclog header sectors */
|
||||
uint l_sectBBsize; /* sector size in BBs (2^n) */
|
||||
|
@ -503,29 +498,40 @@ typedef struct log {
|
|||
int l_logBBsize; /* size of log in BB chunks */
|
||||
|
||||
/* The following block of fields are changed while holding icloglock */
|
||||
sv_t l_flush_wait ____cacheline_aligned_in_smp;
|
||||
wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
|
||||
/* waiting for iclog flush */
|
||||
int l_covered_state;/* state of "covering disk
|
||||
* log entries" */
|
||||
xlog_in_core_t *l_iclog; /* head log queue */
|
||||
spinlock_t l_icloglock; /* grab to change iclog state */
|
||||
xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
|
||||
* buffers */
|
||||
xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
|
||||
int l_curr_cycle; /* Cycle number of log writes */
|
||||
int l_prev_cycle; /* Cycle number before last
|
||||
* block increment */
|
||||
int l_curr_block; /* current logical log block */
|
||||
int l_prev_block; /* previous logical log block */
|
||||
|
||||
/* The following block of fields are changed while holding grant_lock */
|
||||
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
|
||||
xlog_ticket_t *l_reserve_headq;
|
||||
xlog_ticket_t *l_write_headq;
|
||||
int l_grant_reserve_cycle;
|
||||
int l_grant_reserve_bytes;
|
||||
int l_grant_write_cycle;
|
||||
int l_grant_write_bytes;
|
||||
/*
|
||||
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
|
||||
* read without needing to hold specific locks. To avoid operations
|
||||
* contending with other hot objects, place each of them on a separate
|
||||
* cacheline.
|
||||
*/
|
||||
/* lsn of last LR on disk */
|
||||
atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
|
||||
/* lsn of 1st LR with unflushed * buffers */
|
||||
atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
|
||||
|
||||
/*
|
||||
* ticket grant locks, queues and accounting have their own cachlines
|
||||
* as these are quite hot and can be operated on concurrently.
|
||||
*/
|
||||
spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
|
||||
struct list_head l_reserveq;
|
||||
atomic64_t l_grant_reserve_head;
|
||||
|
||||
spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
|
||||
struct list_head l_writeq;
|
||||
atomic64_t l_grant_write_head;
|
||||
|
||||
/* The following field are used for debugging; need to hold icloglock */
|
||||
#ifdef DEBUG
|
||||
|
@ -534,6 +540,9 @@ typedef struct log {
|
|||
|
||||
} xlog_t;
|
||||
|
||||
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
|
||||
((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
|
||||
|
||||
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
|
||||
|
||||
/* common routines */
|
||||
|
@ -561,6 +570,61 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
|
|||
struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
|
||||
xlog_in_core_t **commit_iclog, uint flags);
|
||||
|
||||
/*
|
||||
* When we crack an atomic LSN, we sample it first so that the value will not
|
||||
* change while we are cracking it into the component values. This means we
|
||||
* will always get consistent component values to work from. This should always
|
||||
* be used to smaple and crack LSNs taht are stored and updated in atomic
|
||||
* variables.
|
||||
*/
|
||||
static inline void
|
||||
xlog_crack_atomic_lsn(atomic64_t *lsn, uint *cycle, uint *block)
|
||||
{
|
||||
xfs_lsn_t val = atomic64_read(lsn);
|
||||
|
||||
*cycle = CYCLE_LSN(val);
|
||||
*block = BLOCK_LSN(val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate and assign a value to an atomic LSN variable from component pieces.
|
||||
*/
|
||||
static inline void
|
||||
xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
|
||||
{
|
||||
atomic64_set(lsn, xlog_assign_lsn(cycle, block));
|
||||
}
|
||||
|
||||
/*
|
||||
* When we crack the grant head, we sample it first so that the value will not
|
||||
* change while we are cracking it into the component values. This means we
|
||||
* will always get consistent component values to work from.
|
||||
*/
|
||||
static inline void
|
||||
xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
|
||||
{
|
||||
*cycle = val >> 32;
|
||||
*space = val & 0xffffffff;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
|
||||
{
|
||||
xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
|
||||
}
|
||||
|
||||
static inline int64_t
|
||||
xlog_assign_grant_head_val(int cycle, int space)
|
||||
{
|
||||
return ((int64_t)cycle << 32) | space;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
|
||||
{
|
||||
atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
|
||||
}
|
||||
|
||||
/*
|
||||
* Committed Item List interfaces
|
||||
*/
|
||||
|
@ -585,6 +649,21 @@ xlog_cil_force(struct log *log)
|
|||
*/
|
||||
#define XLOG_UNMOUNT_REC_TYPE (-1U)
|
||||
|
||||
/*
|
||||
* Wrapper function for waiting on a wait queue serialised against wakeups
|
||||
* by a spinlock. This matches the semantics of all the wait queues used in the
|
||||
* log code.
|
||||
*/
|
||||
static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
add_wait_queue_exclusive(wq, &wait);
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(lock);
|
||||
schedule();
|
||||
remove_wait_queue(wq, &wait);
|
||||
}
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* __XFS_LOG_PRIV_H__ */
|
||||
|
|
|
@ -52,6 +52,17 @@ STATIC void xlog_recover_check_summary(xlog_t *);
|
|||
#define xlog_recover_check_summary(log)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This structure is used during recovery to record the buf log items which
|
||||
* have been canceled and should not be replayed.
|
||||
*/
|
||||
struct xfs_buf_cancel {
|
||||
xfs_daddr_t bc_blkno;
|
||||
uint bc_len;
|
||||
int bc_refcount;
|
||||
struct list_head bc_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* Sector aligned buffer routines for buffer create/read/write/access
|
||||
*/
|
||||
|
@ -925,12 +936,12 @@ xlog_find_tail(
|
|||
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
|
||||
if (found == 2)
|
||||
log->l_curr_cycle++;
|
||||
log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
|
||||
log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
|
||||
log->l_grant_reserve_cycle = log->l_curr_cycle;
|
||||
log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
|
||||
log->l_grant_write_cycle = log->l_curr_cycle;
|
||||
log->l_grant_write_bytes = BBTOB(log->l_curr_block);
|
||||
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
|
||||
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
|
||||
xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle,
|
||||
BBTOB(log->l_curr_block));
|
||||
xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle,
|
||||
BBTOB(log->l_curr_block));
|
||||
|
||||
/*
|
||||
* Look for unmount record. If we find it, then we know there
|
||||
|
@ -960,7 +971,7 @@ xlog_find_tail(
|
|||
}
|
||||
after_umount_blk = (i + hblks + (int)
|
||||
BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
|
||||
tail_lsn = log->l_tail_lsn;
|
||||
tail_lsn = atomic64_read(&log->l_tail_lsn);
|
||||
if (*head_blk == after_umount_blk &&
|
||||
be32_to_cpu(rhead->h_num_logops) == 1) {
|
||||
umount_data_blk = (i + hblks) % log->l_logBBsize;
|
||||
|
@ -975,12 +986,10 @@ xlog_find_tail(
|
|||
* log records will point recovery to after the
|
||||
* current unmount record.
|
||||
*/
|
||||
log->l_tail_lsn =
|
||||
xlog_assign_lsn(log->l_curr_cycle,
|
||||
after_umount_blk);
|
||||
log->l_last_sync_lsn =
|
||||
xlog_assign_lsn(log->l_curr_cycle,
|
||||
after_umount_blk);
|
||||
xlog_assign_atomic_lsn(&log->l_tail_lsn,
|
||||
log->l_curr_cycle, after_umount_blk);
|
||||
xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
|
||||
log->l_curr_cycle, after_umount_blk);
|
||||
*tail_blk = after_umount_blk;
|
||||
|
||||
/*
|
||||
|
@ -1605,82 +1614,45 @@ xlog_recover_reorder_trans(
|
|||
* record in the table to tell us how many times we expect to see this
|
||||
* record during the second pass.
|
||||
*/
|
||||
STATIC void
|
||||
xlog_recover_do_buffer_pass1(
|
||||
xlog_t *log,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
STATIC int
|
||||
xlog_recover_buffer_pass1(
|
||||
struct log *log,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_buf_cancel_t *bcp;
|
||||
xfs_buf_cancel_t *nextp;
|
||||
xfs_buf_cancel_t *prevp;
|
||||
xfs_buf_cancel_t **bucket;
|
||||
xfs_daddr_t blkno = 0;
|
||||
uint len = 0;
|
||||
ushort flags = 0;
|
||||
|
||||
switch (buf_f->blf_type) {
|
||||
case XFS_LI_BUF:
|
||||
blkno = buf_f->blf_blkno;
|
||||
len = buf_f->blf_len;
|
||||
flags = buf_f->blf_flags;
|
||||
break;
|
||||
}
|
||||
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
|
||||
struct list_head *bucket;
|
||||
struct xfs_buf_cancel *bcp;
|
||||
|
||||
/*
|
||||
* If this isn't a cancel buffer item, then just return.
|
||||
*/
|
||||
if (!(flags & XFS_BLF_CANCEL)) {
|
||||
if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
|
||||
trace_xfs_log_recover_buf_not_cancel(log, buf_f);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an xfs_buf_cancel record into the hash table of
|
||||
* them. If there is already an identical record, bump
|
||||
* its reference count.
|
||||
* Insert an xfs_buf_cancel record into the hash table of them.
|
||||
* If there is already an identical record, bump its reference count.
|
||||
*/
|
||||
bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
|
||||
XLOG_BC_TABLE_SIZE];
|
||||
/*
|
||||
* If the hash bucket is empty then just insert a new record into
|
||||
* the bucket.
|
||||
*/
|
||||
if (*bucket == NULL) {
|
||||
bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
|
||||
KM_SLEEP);
|
||||
bcp->bc_blkno = blkno;
|
||||
bcp->bc_len = len;
|
||||
bcp->bc_refcount = 1;
|
||||
bcp->bc_next = NULL;
|
||||
*bucket = bcp;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The hash bucket is not empty, so search for duplicates of our
|
||||
* record. If we find one them just bump its refcount. If not
|
||||
* then add us at the end of the list.
|
||||
*/
|
||||
prevp = NULL;
|
||||
nextp = *bucket;
|
||||
while (nextp != NULL) {
|
||||
if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
|
||||
nextp->bc_refcount++;
|
||||
bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno);
|
||||
list_for_each_entry(bcp, bucket, bc_list) {
|
||||
if (bcp->bc_blkno == buf_f->blf_blkno &&
|
||||
bcp->bc_len == buf_f->blf_len) {
|
||||
bcp->bc_refcount++;
|
||||
trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
prevp = nextp;
|
||||
nextp = nextp->bc_next;
|
||||
}
|
||||
ASSERT(prevp != NULL);
|
||||
bcp = (xfs_buf_cancel_t *)kmem_alloc(sizeof(xfs_buf_cancel_t),
|
||||
KM_SLEEP);
|
||||
bcp->bc_blkno = blkno;
|
||||
bcp->bc_len = len;
|
||||
|
||||
bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
|
||||
bcp->bc_blkno = buf_f->blf_blkno;
|
||||
bcp->bc_len = buf_f->blf_len;
|
||||
bcp->bc_refcount = 1;
|
||||
bcp->bc_next = NULL;
|
||||
prevp->bc_next = bcp;
|
||||
list_add_tail(&bcp->bc_list, bucket);
|
||||
|
||||
trace_xfs_log_recover_buf_cancel_add(log, buf_f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1698,14 +1670,13 @@ xlog_recover_do_buffer_pass1(
|
|||
*/
|
||||
STATIC int
|
||||
xlog_check_buffer_cancelled(
|
||||
xlog_t *log,
|
||||
struct log *log,
|
||||
xfs_daddr_t blkno,
|
||||
uint len,
|
||||
ushort flags)
|
||||
{
|
||||
xfs_buf_cancel_t *bcp;
|
||||
xfs_buf_cancel_t *prevp;
|
||||
xfs_buf_cancel_t **bucket;
|
||||
struct list_head *bucket;
|
||||
struct xfs_buf_cancel *bcp;
|
||||
|
||||
if (log->l_buf_cancel_table == NULL) {
|
||||
/*
|
||||
|
@ -1716,128 +1687,70 @@ xlog_check_buffer_cancelled(
|
|||
return 0;
|
||||
}
|
||||
|
||||
bucket = &log->l_buf_cancel_table[(__uint64_t)blkno %
|
||||
XLOG_BC_TABLE_SIZE];
|
||||
bcp = *bucket;
|
||||
if (bcp == NULL) {
|
||||
/*
|
||||
* There is no corresponding entry in the table built
|
||||
* in pass one, so this buffer has not been cancelled.
|
||||
*/
|
||||
ASSERT(!(flags & XFS_BLF_CANCEL));
|
||||
return 0;
|
||||
/*
|
||||
* Search for an entry in the cancel table that matches our buffer.
|
||||
*/
|
||||
bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
|
||||
list_for_each_entry(bcp, bucket, bc_list) {
|
||||
if (bcp->bc_blkno == blkno && bcp->bc_len == len)
|
||||
goto found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for an entry in the buffer cancel table that
|
||||
* matches our buffer.
|
||||
*/
|
||||
prevp = NULL;
|
||||
while (bcp != NULL) {
|
||||
if (bcp->bc_blkno == blkno && bcp->bc_len == len) {
|
||||
/*
|
||||
* We've go a match, so return 1 so that the
|
||||
* recovery of this buffer is cancelled.
|
||||
* If this buffer is actually a buffer cancel
|
||||
* log item, then decrement the refcount on the
|
||||
* one in the table and remove it if this is the
|
||||
* last reference.
|
||||
*/
|
||||
if (flags & XFS_BLF_CANCEL) {
|
||||
bcp->bc_refcount--;
|
||||
if (bcp->bc_refcount == 0) {
|
||||
if (prevp == NULL) {
|
||||
*bucket = bcp->bc_next;
|
||||
} else {
|
||||
prevp->bc_next = bcp->bc_next;
|
||||
}
|
||||
kmem_free(bcp);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
prevp = bcp;
|
||||
bcp = bcp->bc_next;
|
||||
}
|
||||
/*
|
||||
* We didn't find a corresponding entry in the table, so
|
||||
* return 0 so that the buffer is NOT cancelled.
|
||||
* We didn't find a corresponding entry in the table, so return 0 so
|
||||
* that the buffer is NOT cancelled.
|
||||
*/
|
||||
ASSERT(!(flags & XFS_BLF_CANCEL));
|
||||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xlog_recover_do_buffer_pass2(
|
||||
xlog_t *log,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
{
|
||||
xfs_daddr_t blkno = 0;
|
||||
ushort flags = 0;
|
||||
uint len = 0;
|
||||
|
||||
switch (buf_f->blf_type) {
|
||||
case XFS_LI_BUF:
|
||||
blkno = buf_f->blf_blkno;
|
||||
flags = buf_f->blf_flags;
|
||||
len = buf_f->blf_len;
|
||||
break;
|
||||
found:
|
||||
/*
|
||||
* We've go a match, so return 1 so that the recovery of this buffer
|
||||
* is cancelled. If this buffer is actually a buffer cancel log
|
||||
* item, then decrement the refcount on the one in the table and
|
||||
* remove it if this is the last reference.
|
||||
*/
|
||||
if (flags & XFS_BLF_CANCEL) {
|
||||
if (--bcp->bc_refcount == 0) {
|
||||
list_del(&bcp->bc_list);
|
||||
kmem_free(bcp);
|
||||
}
|
||||
}
|
||||
|
||||
return xlog_check_buffer_cancelled(log, blkno, len, flags);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform recovery for a buffer full of inodes. In these buffers,
|
||||
* the only data which should be recovered is that which corresponds
|
||||
* to the di_next_unlinked pointers in the on disk inode structures.
|
||||
* The rest of the data for the inodes is always logged through the
|
||||
* inodes themselves rather than the inode buffer and is recovered
|
||||
* in xlog_recover_do_inode_trans().
|
||||
* Perform recovery for a buffer full of inodes. In these buffers, the only
|
||||
* data which should be recovered is that which corresponds to the
|
||||
* di_next_unlinked pointers in the on disk inode structures. The rest of the
|
||||
* data for the inodes is always logged through the inodes themselves rather
|
||||
* than the inode buffer and is recovered in xlog_recover_inode_pass2().
|
||||
*
|
||||
* The only time when buffers full of inodes are fully recovered is
|
||||
* when the buffer is full of newly allocated inodes. In this case
|
||||
* the buffer will not be marked as an inode buffer and so will be
|
||||
* sent to xlog_recover_do_reg_buffer() below during recovery.
|
||||
* The only time when buffers full of inodes are fully recovered is when the
|
||||
* buffer is full of newly allocated inodes. In this case the buffer will
|
||||
* not be marked as an inode buffer and so will be sent to
|
||||
* xlog_recover_do_reg_buffer() below during recovery.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_inode_buffer(
|
||||
xfs_mount_t *mp,
|
||||
struct xfs_mount *mp,
|
||||
xlog_recover_item_t *item,
|
||||
xfs_buf_t *bp,
|
||||
struct xfs_buf *bp,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
{
|
||||
int i;
|
||||
int item_index;
|
||||
int bit;
|
||||
int nbits;
|
||||
int reg_buf_offset;
|
||||
int reg_buf_bytes;
|
||||
int item_index = 0;
|
||||
int bit = 0;
|
||||
int nbits = 0;
|
||||
int reg_buf_offset = 0;
|
||||
int reg_buf_bytes = 0;
|
||||
int next_unlinked_offset;
|
||||
int inodes_per_buf;
|
||||
xfs_agino_t *logged_nextp;
|
||||
xfs_agino_t *buffer_nextp;
|
||||
unsigned int *data_map = NULL;
|
||||
unsigned int map_size = 0;
|
||||
|
||||
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
|
||||
|
||||
switch (buf_f->blf_type) {
|
||||
case XFS_LI_BUF:
|
||||
data_map = buf_f->blf_data_map;
|
||||
map_size = buf_f->blf_map_size;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Set the variables corresponding to the current region to
|
||||
* 0 so that we'll initialize them on the first pass through
|
||||
* the loop.
|
||||
*/
|
||||
reg_buf_offset = 0;
|
||||
reg_buf_bytes = 0;
|
||||
bit = 0;
|
||||
nbits = 0;
|
||||
item_index = 0;
|
||||
inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
|
||||
for (i = 0; i < inodes_per_buf; i++) {
|
||||
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
|
||||
|
@ -1852,18 +1765,18 @@ xlog_recover_do_inode_buffer(
|
|||
* the current di_next_unlinked field.
|
||||
*/
|
||||
bit += nbits;
|
||||
bit = xfs_next_bit(data_map, map_size, bit);
|
||||
bit = xfs_next_bit(buf_f->blf_data_map,
|
||||
buf_f->blf_map_size, bit);
|
||||
|
||||
/*
|
||||
* If there are no more logged regions in the
|
||||
* buffer, then we're done.
|
||||
*/
|
||||
if (bit == -1) {
|
||||
if (bit == -1)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nbits = xfs_contig_bits(data_map, map_size,
|
||||
bit);
|
||||
nbits = xfs_contig_bits(buf_f->blf_data_map,
|
||||
buf_f->blf_map_size, bit);
|
||||
ASSERT(nbits > 0);
|
||||
reg_buf_offset = bit << XFS_BLF_SHIFT;
|
||||
reg_buf_bytes = nbits << XFS_BLF_SHIFT;
|
||||
|
@ -1875,9 +1788,8 @@ xlog_recover_do_inode_buffer(
|
|||
* di_next_unlinked field, then move on to the next
|
||||
* di_next_unlinked field.
|
||||
*/
|
||||
if (next_unlinked_offset < reg_buf_offset) {
|
||||
if (next_unlinked_offset < reg_buf_offset)
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT(item->ri_buf[item_index].i_addr != NULL);
|
||||
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
|
||||
|
@ -1913,36 +1825,29 @@ xlog_recover_do_inode_buffer(
|
|||
* given buffer. The bitmap in the buf log format structure indicates
|
||||
* where to place the logged data.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
STATIC void
|
||||
xlog_recover_do_reg_buffer(
|
||||
struct xfs_mount *mp,
|
||||
xlog_recover_item_t *item,
|
||||
xfs_buf_t *bp,
|
||||
struct xfs_buf *bp,
|
||||
xfs_buf_log_format_t *buf_f)
|
||||
{
|
||||
int i;
|
||||
int bit;
|
||||
int nbits;
|
||||
unsigned int *data_map = NULL;
|
||||
unsigned int map_size = 0;
|
||||
int error;
|
||||
|
||||
trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
|
||||
|
||||
switch (buf_f->blf_type) {
|
||||
case XFS_LI_BUF:
|
||||
data_map = buf_f->blf_data_map;
|
||||
map_size = buf_f->blf_map_size;
|
||||
break;
|
||||
}
|
||||
bit = 0;
|
||||
i = 1; /* 0 is the buf format structure */
|
||||
while (1) {
|
||||
bit = xfs_next_bit(data_map, map_size, bit);
|
||||
bit = xfs_next_bit(buf_f->blf_data_map,
|
||||
buf_f->blf_map_size, bit);
|
||||
if (bit == -1)
|
||||
break;
|
||||
nbits = xfs_contig_bits(data_map, map_size, bit);
|
||||
nbits = xfs_contig_bits(buf_f->blf_data_map,
|
||||
buf_f->blf_map_size, bit);
|
||||
ASSERT(nbits > 0);
|
||||
ASSERT(item->ri_buf[i].i_addr != NULL);
|
||||
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
|
||||
|
@ -2176,77 +2081,46 @@ xlog_recover_do_dquot_buffer(
|
|||
* for more details on the implementation of the table of cancel records.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_buffer_trans(
|
||||
xlog_recover_buffer_pass2(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
int pass)
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
|
||||
xfs_mount_t *mp;
|
||||
xfs_mount_t *mp = log->l_mp;
|
||||
xfs_buf_t *bp;
|
||||
int error;
|
||||
int cancel;
|
||||
xfs_daddr_t blkno;
|
||||
int len;
|
||||
ushort flags;
|
||||
uint buf_flags;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS1) {
|
||||
/*
|
||||
* In this pass we're only looking for buf items
|
||||
* with the XFS_BLF_CANCEL bit set.
|
||||
*/
|
||||
xlog_recover_do_buffer_pass1(log, buf_f);
|
||||
/*
|
||||
* In this pass we only want to recover all the buffers which have
|
||||
* not been cancelled and are not cancellation buffers themselves.
|
||||
*/
|
||||
if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno,
|
||||
buf_f->blf_len, buf_f->blf_flags)) {
|
||||
trace_xfs_log_recover_buf_cancel(log, buf_f);
|
||||
return 0;
|
||||
} else {
|
||||
/*
|
||||
* In this pass we want to recover all the buffers
|
||||
* which have not been cancelled and are not
|
||||
* cancellation buffers themselves. The routine
|
||||
* we call here will tell us whether or not to
|
||||
* continue with the replay of this buffer.
|
||||
*/
|
||||
cancel = xlog_recover_do_buffer_pass2(log, buf_f);
|
||||
if (cancel) {
|
||||
trace_xfs_log_recover_buf_cancel(log, buf_f);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
trace_xfs_log_recover_buf_recover(log, buf_f);
|
||||
switch (buf_f->blf_type) {
|
||||
case XFS_LI_BUF:
|
||||
blkno = buf_f->blf_blkno;
|
||||
len = buf_f->blf_len;
|
||||
flags = buf_f->blf_flags;
|
||||
break;
|
||||
default:
|
||||
xfs_fs_cmn_err(CE_ALERT, log->l_mp,
|
||||
"xfs_log_recover: unknown buffer type 0x%x, logdev %s",
|
||||
buf_f->blf_type, log->l_mp->m_logname ?
|
||||
log->l_mp->m_logname : "internal");
|
||||
XFS_ERROR_REPORT("xlog_recover_do_buffer_trans",
|
||||
XFS_ERRLEVEL_LOW, log->l_mp);
|
||||
return XFS_ERROR(EFSCORRUPTED);
|
||||
}
|
||||
|
||||
mp = log->l_mp;
|
||||
trace_xfs_log_recover_buf_recover(log, buf_f);
|
||||
|
||||
buf_flags = XBF_LOCK;
|
||||
if (!(flags & XFS_BLF_INODE_BUF))
|
||||
if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
|
||||
buf_flags |= XBF_MAPPED;
|
||||
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
|
||||
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
|
||||
buf_flags);
|
||||
if (XFS_BUF_ISERROR(bp)) {
|
||||
xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
|
||||
bp, blkno);
|
||||
xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
|
||||
bp, buf_f->blf_blkno);
|
||||
error = XFS_BUF_GETERROR(bp);
|
||||
xfs_buf_relse(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
if (flags & XFS_BLF_INODE_BUF) {
|
||||
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
|
||||
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
|
||||
} else if (flags &
|
||||
} else if (buf_f->blf_flags &
|
||||
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
|
||||
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
|
||||
} else {
|
||||
|
@ -2286,16 +2160,14 @@ xlog_recover_do_buffer_trans(
|
|||
}
|
||||
|
||||
STATIC int
|
||||
xlog_recover_do_inode_trans(
|
||||
xlog_recover_inode_pass2(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
int pass)
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_inode_log_format_t *in_f;
|
||||
xfs_mount_t *mp;
|
||||
xfs_mount_t *mp = log->l_mp;
|
||||
xfs_buf_t *bp;
|
||||
xfs_dinode_t *dip;
|
||||
xfs_ino_t ino;
|
||||
int len;
|
||||
xfs_caddr_t src;
|
||||
xfs_caddr_t dest;
|
||||
|
@ -2305,10 +2177,6 @@ xlog_recover_do_inode_trans(
|
|||
xfs_icdinode_t *dicp;
|
||||
int need_free = 0;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
|
||||
in_f = item->ri_buf[0].i_addr;
|
||||
} else {
|
||||
|
@ -2318,8 +2186,6 @@ xlog_recover_do_inode_trans(
|
|||
if (error)
|
||||
goto error;
|
||||
}
|
||||
ino = in_f->ilf_ino;
|
||||
mp = log->l_mp;
|
||||
|
||||
/*
|
||||
* Inode buffers can be freed, look out for it,
|
||||
|
@ -2354,8 +2220,8 @@ xlog_recover_do_inode_trans(
|
|||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
|
||||
dip, bp, ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_do_inode_trans(1)",
|
||||
dip, bp, in_f->ilf_ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
|
||||
XFS_ERRLEVEL_LOW, mp);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
|
@ -2365,8 +2231,8 @@ xlog_recover_do_inode_trans(
|
|||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld",
|
||||
item, ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_do_inode_trans(2)",
|
||||
item, in_f->ilf_ino);
|
||||
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
|
||||
XFS_ERRLEVEL_LOW, mp);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
|
@ -2394,12 +2260,12 @@ xlog_recover_do_inode_trans(
|
|||
if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
|
||||
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
|
||||
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(3)",
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
|
||||
item, dip, bp, ino);
|
||||
item, dip, bp, in_f->ilf_ino);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
}
|
||||
|
@ -2407,40 +2273,40 @@ xlog_recover_do_inode_trans(
|
|||
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
|
||||
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
|
||||
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(4)",
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
|
||||
item, dip, bp, ino);
|
||||
item, dip, bp, in_f->ilf_ino);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(5)",
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
|
||||
item, dip, bp, ino,
|
||||
item, dip, bp, in_f->ilf_ino,
|
||||
dicp->di_nextents + dicp->di_anextents,
|
||||
dicp->di_nblocks);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
}
|
||||
if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(6)",
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
"xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x",
|
||||
item, dip, bp, ino, dicp->di_forkoff);
|
||||
item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
|
||||
error = EFSCORRUPTED;
|
||||
goto error;
|
||||
}
|
||||
if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
|
||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
|
||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||
xfs_buf_relse(bp);
|
||||
xfs_fs_cmn_err(CE_ALERT, mp,
|
||||
|
@ -2532,7 +2398,7 @@ xlog_recover_do_inode_trans(
|
|||
break;
|
||||
|
||||
default:
|
||||
xlog_warn("XFS: xlog_recover_do_inode_trans: Invalid flag");
|
||||
xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag");
|
||||
ASSERT(0);
|
||||
xfs_buf_relse(bp);
|
||||
error = EIO;
|
||||
|
@ -2556,18 +2422,11 @@ xlog_recover_do_inode_trans(
|
|||
* of that type.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_quotaoff_trans(
|
||||
xlog_recover_quotaoff_pass1(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
int pass)
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_qoff_logformat_t *qoff_f;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS2) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
qoff_f = item->ri_buf[0].i_addr;
|
||||
xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
|
||||
ASSERT(qoff_f);
|
||||
|
||||
/*
|
||||
|
@ -2588,22 +2447,17 @@ xlog_recover_do_quotaoff_trans(
|
|||
* Recover a dquot record
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_dquot_trans(
|
||||
xlog_recover_dquot_pass2(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
int pass)
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_mount_t *mp;
|
||||
xfs_mount_t *mp = log->l_mp;
|
||||
xfs_buf_t *bp;
|
||||
struct xfs_disk_dquot *ddq, *recddq;
|
||||
int error;
|
||||
xfs_dq_logformat_t *dq_f;
|
||||
uint type;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS1) {
|
||||
return 0;
|
||||
}
|
||||
mp = log->l_mp;
|
||||
|
||||
/*
|
||||
* Filesystems are required to send in quota flags at mount time.
|
||||
|
@ -2647,7 +2501,7 @@ xlog_recover_do_dquot_trans(
|
|||
if ((error = xfs_qm_dqcheck(recddq,
|
||||
dq_f->qlf_id,
|
||||
0, XFS_QMOPT_DOWARN,
|
||||
"xlog_recover_do_dquot_trans (log copy)"))) {
|
||||
"xlog_recover_dquot_pass2 (log copy)"))) {
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
ASSERT(dq_f->qlf_len == 1);
|
||||
|
@ -2670,7 +2524,7 @@ xlog_recover_do_dquot_trans(
|
|||
* minimal initialization then.
|
||||
*/
|
||||
if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
|
||||
"xlog_recover_do_dquot_trans")) {
|
||||
"xlog_recover_dquot_pass2")) {
|
||||
xfs_buf_relse(bp);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
@ -2693,38 +2547,31 @@ xlog_recover_do_dquot_trans(
|
|||
* LSN.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_efi_trans(
|
||||
xlog_recover_efi_pass2(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
xfs_lsn_t lsn,
|
||||
int pass)
|
||||
xfs_lsn_t lsn)
|
||||
{
|
||||
int error;
|
||||
xfs_mount_t *mp;
|
||||
xfs_mount_t *mp = log->l_mp;
|
||||
xfs_efi_log_item_t *efip;
|
||||
xfs_efi_log_format_t *efi_formatp;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
efi_formatp = item->ri_buf[0].i_addr;
|
||||
|
||||
mp = log->l_mp;
|
||||
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
|
||||
if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
|
||||
&(efip->efi_format)))) {
|
||||
xfs_efi_item_free(efip);
|
||||
return error;
|
||||
}
|
||||
efip->efi_next_extent = efi_formatp->efi_nextents;
|
||||
efip->efi_flags |= XFS_EFI_COMMITTED;
|
||||
atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
|
||||
|
||||
spin_lock(&log->l_ailp->xa_lock);
|
||||
/*
|
||||
* xfs_trans_ail_update() drops the AIL lock.
|
||||
*/
|
||||
xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
|
||||
xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2737,11 +2584,10 @@ xlog_recover_do_efi_trans(
|
|||
* efd format structure. If we find it, we remove the efi from the
|
||||
* AIL and free it.
|
||||
*/
|
||||
STATIC void
|
||||
xlog_recover_do_efd_trans(
|
||||
STATIC int
|
||||
xlog_recover_efd_pass2(
|
||||
xlog_t *log,
|
||||
xlog_recover_item_t *item,
|
||||
int pass)
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
xfs_efd_log_format_t *efd_formatp;
|
||||
xfs_efi_log_item_t *efip = NULL;
|
||||
|
@ -2750,10 +2596,6 @@ xlog_recover_do_efd_trans(
|
|||
struct xfs_ail_cursor cur;
|
||||
struct xfs_ail *ailp = log->l_ailp;
|
||||
|
||||
if (pass == XLOG_RECOVER_PASS1) {
|
||||
return;
|
||||
}
|
||||
|
||||
efd_formatp = item->ri_buf[0].i_addr;
|
||||
ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
|
||||
((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
|
||||
|
@ -2785,62 +2627,6 @@ xlog_recover_do_efd_trans(
|
|||
}
|
||||
xfs_trans_ail_cursor_done(ailp, &cur);
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the transaction
|
||||
*
|
||||
* If the transaction modifies a buffer or inode, do it now. Otherwise,
|
||||
* EFIs and EFDs get queued up by adding entries into the AIL for them.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_do_trans(
|
||||
xlog_t *log,
|
||||
xlog_recover_t *trans,
|
||||
int pass)
|
||||
{
|
||||
int error = 0;
|
||||
xlog_recover_item_t *item;
|
||||
|
||||
error = xlog_recover_reorder_trans(log, trans, pass);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
list_for_each_entry(item, &trans->r_itemq, ri_list) {
|
||||
trace_xfs_log_recover_item_recover(log, trans, item, pass);
|
||||
switch (ITEM_TYPE(item)) {
|
||||
case XFS_LI_BUF:
|
||||
error = xlog_recover_do_buffer_trans(log, item, pass);
|
||||
break;
|
||||
case XFS_LI_INODE:
|
||||
error = xlog_recover_do_inode_trans(log, item, pass);
|
||||
break;
|
||||
case XFS_LI_EFI:
|
||||
error = xlog_recover_do_efi_trans(log, item,
|
||||
trans->r_lsn, pass);
|
||||
break;
|
||||
case XFS_LI_EFD:
|
||||
xlog_recover_do_efd_trans(log, item, pass);
|
||||
error = 0;
|
||||
break;
|
||||
case XFS_LI_DQUOT:
|
||||
error = xlog_recover_do_dquot_trans(log, item, pass);
|
||||
break;
|
||||
case XFS_LI_QUOTAOFF:
|
||||
error = xlog_recover_do_quotaoff_trans(log, item,
|
||||
pass);
|
||||
break;
|
||||
default:
|
||||
xlog_warn(
|
||||
"XFS: invalid item type (%d) xlog_recover_do_trans", ITEM_TYPE(item));
|
||||
ASSERT(0);
|
||||
error = XFS_ERROR(EIO);
|
||||
break;
|
||||
}
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2852,7 +2638,7 @@ xlog_recover_do_trans(
|
|||
*/
|
||||
STATIC void
|
||||
xlog_recover_free_trans(
|
||||
xlog_recover_t *trans)
|
||||
struct xlog_recover *trans)
|
||||
{
|
||||
xlog_recover_item_t *item, *n;
|
||||
int i;
|
||||
|
@ -2870,18 +2656,96 @@ xlog_recover_free_trans(
|
|||
kmem_free(trans);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xlog_recover_commit_pass1(
|
||||
struct log *log,
|
||||
struct xlog_recover *trans,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
|
||||
|
||||
switch (ITEM_TYPE(item)) {
|
||||
case XFS_LI_BUF:
|
||||
return xlog_recover_buffer_pass1(log, item);
|
||||
case XFS_LI_QUOTAOFF:
|
||||
return xlog_recover_quotaoff_pass1(log, item);
|
||||
case XFS_LI_INODE:
|
||||
case XFS_LI_EFI:
|
||||
case XFS_LI_EFD:
|
||||
case XFS_LI_DQUOT:
|
||||
/* nothing to do in pass 1 */
|
||||
return 0;
|
||||
default:
|
||||
xlog_warn(
|
||||
"XFS: invalid item type (%d) xlog_recover_commit_pass1",
|
||||
ITEM_TYPE(item));
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xlog_recover_commit_pass2(
|
||||
struct log *log,
|
||||
struct xlog_recover *trans,
|
||||
xlog_recover_item_t *item)
|
||||
{
|
||||
trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
|
||||
|
||||
switch (ITEM_TYPE(item)) {
|
||||
case XFS_LI_BUF:
|
||||
return xlog_recover_buffer_pass2(log, item);
|
||||
case XFS_LI_INODE:
|
||||
return xlog_recover_inode_pass2(log, item);
|
||||
case XFS_LI_EFI:
|
||||
return xlog_recover_efi_pass2(log, item, trans->r_lsn);
|
||||
case XFS_LI_EFD:
|
||||
return xlog_recover_efd_pass2(log, item);
|
||||
case XFS_LI_DQUOT:
|
||||
return xlog_recover_dquot_pass2(log, item);
|
||||
case XFS_LI_QUOTAOFF:
|
||||
/* nothing to do in pass2 */
|
||||
return 0;
|
||||
default:
|
||||
xlog_warn(
|
||||
"XFS: invalid item type (%d) xlog_recover_commit_pass2",
|
||||
ITEM_TYPE(item));
|
||||
ASSERT(0);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the transaction.
|
||||
*
|
||||
* If the transaction modifies a buffer or inode, do it now. Otherwise,
|
||||
* EFIs and EFDs get queued up by adding entries into the AIL for them.
|
||||
*/
|
||||
STATIC int
|
||||
xlog_recover_commit_trans(
|
||||
xlog_t *log,
|
||||
xlog_recover_t *trans,
|
||||
struct log *log,
|
||||
struct xlog_recover *trans,
|
||||
int pass)
|
||||
{
|
||||
int error;
|
||||
int error = 0;
|
||||
xlog_recover_item_t *item;
|
||||
|
||||
hlist_del(&trans->r_list);
|
||||
if ((error = xlog_recover_do_trans(log, trans, pass)))
|
||||
|
||||
error = xlog_recover_reorder_trans(log, trans, pass);
|
||||
if (error)
|
||||
return error;
|
||||
xlog_recover_free_trans(trans); /* no error */
|
||||
|
||||
list_for_each_entry(item, &trans->r_itemq, ri_list) {
|
||||
if (pass == XLOG_RECOVER_PASS1)
|
||||
error = xlog_recover_commit_pass1(log, trans, item);
|
||||
else
|
||||
error = xlog_recover_commit_pass2(log, trans, item);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
xlog_recover_free_trans(trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3011,7 +2875,7 @@ xlog_recover_process_efi(
|
|||
xfs_extent_t *extp;
|
||||
xfs_fsblock_t startblock_fsb;
|
||||
|
||||
ASSERT(!(efip->efi_flags & XFS_EFI_RECOVERED));
|
||||
ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
|
||||
|
||||
/*
|
||||
* First check the validity of the extents described by the
|
||||
|
@ -3050,7 +2914,7 @@ xlog_recover_process_efi(
|
|||
extp->ext_len);
|
||||
}
|
||||
|
||||
efip->efi_flags |= XFS_EFI_RECOVERED;
|
||||
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
|
||||
error = xfs_trans_commit(tp, 0);
|
||||
return error;
|
||||
|
||||
|
@ -3107,7 +2971,7 @@ xlog_recover_process_efis(
|
|||
* Skip EFIs that we've already processed.
|
||||
*/
|
||||
efip = (xfs_efi_log_item_t *)lip;
|
||||
if (efip->efi_flags & XFS_EFI_RECOVERED) {
|
||||
if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
|
||||
lip = xfs_trans_ail_cursor_next(ailp, &cur);
|
||||
continue;
|
||||
}
|
||||
|
@ -3724,7 +3588,7 @@ xlog_do_log_recovery(
|
|||
xfs_daddr_t head_blk,
|
||||
xfs_daddr_t tail_blk)
|
||||
{
|
||||
int error;
|
||||
int error, i;
|
||||
|
||||
ASSERT(head_blk != tail_blk);
|
||||
|
||||
|
@ -3732,10 +3596,12 @@ xlog_do_log_recovery(
|
|||
* First do a pass to find all of the cancelled buf log items.
|
||||
* Store them in the buf_cancel_table for use in the second pass.
|
||||
*/
|
||||
log->l_buf_cancel_table =
|
||||
(xfs_buf_cancel_t **)kmem_zalloc(XLOG_BC_TABLE_SIZE *
|
||||
sizeof(xfs_buf_cancel_t*),
|
||||
log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
|
||||
sizeof(struct list_head),
|
||||
KM_SLEEP);
|
||||
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
|
||||
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
|
||||
|
||||
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
|
||||
XLOG_RECOVER_PASS1);
|
||||
if (error != 0) {
|
||||
|
@ -3754,7 +3620,7 @@ xlog_do_log_recovery(
|
|||
int i;
|
||||
|
||||
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
|
||||
ASSERT(log->l_buf_cancel_table[i] == NULL);
|
||||
ASSERT(list_empty(&log->l_buf_cancel_table[i]));
|
||||
}
|
||||
#endif /* DEBUG */
|
||||
|
||||
|
|
|
@ -472,7 +472,7 @@ xfs_initialize_perag(
|
|||
goto out_unwind;
|
||||
pag->pag_agno = index;
|
||||
pag->pag_mount = mp;
|
||||
rwlock_init(&pag->pag_ici_lock);
|
||||
spin_lock_init(&pag->pag_ici_lock);
|
||||
mutex_init(&pag->pag_ici_reclaim_lock);
|
||||
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
|
||||
spin_lock_init(&pag->pag_buf_lock);
|
||||
|
@ -974,6 +974,24 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
|
|||
mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
|
||||
}
|
||||
|
||||
/*
|
||||
* precalculate the low space thresholds for dynamic speculative preallocation.
|
||||
*/
|
||||
void
|
||||
xfs_set_low_space_thresholds(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < XFS_LOWSP_MAX; i++) {
|
||||
__uint64_t space = mp->m_sb.sb_dblocks;
|
||||
|
||||
do_div(space, 100);
|
||||
mp->m_low_space[i] = space * (i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Set whether we're using inode alignment.
|
||||
*/
|
||||
|
@ -1196,6 +1214,9 @@ xfs_mountfs(
|
|||
*/
|
||||
xfs_set_rw_sizes(mp);
|
||||
|
||||
/* set the low space thresholds for dynamic preallocation */
|
||||
xfs_set_low_space_thresholds(mp);
|
||||
|
||||
/*
|
||||
* Set the inode cluster size.
|
||||
* This may still be overridden by the file system
|
||||
|
|
|
@ -103,6 +103,16 @@ extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
|
|||
xfs_mod_incore_sb(mp, field, delta, rsvd)
|
||||
#endif
|
||||
|
||||
/* dynamic preallocation free space thresholds, 5% down to 1% */
|
||||
enum {
|
||||
XFS_LOWSP_1_PCNT = 0,
|
||||
XFS_LOWSP_2_PCNT,
|
||||
XFS_LOWSP_3_PCNT,
|
||||
XFS_LOWSP_4_PCNT,
|
||||
XFS_LOWSP_5_PCNT,
|
||||
XFS_LOWSP_MAX,
|
||||
};
|
||||
|
||||
typedef struct xfs_mount {
|
||||
struct super_block *m_super;
|
||||
xfs_tid_t m_tid; /* next unused tid for fs */
|
||||
|
@ -202,6 +212,8 @@ typedef struct xfs_mount {
|
|||
__int64_t m_update_flags; /* sb flags we need to update
|
||||
on the next remount,rw */
|
||||
struct shrinker m_inode_shrink; /* inode reclaim shrinker */
|
||||
int64_t m_low_space[XFS_LOWSP_MAX];
|
||||
/* low free space thresholds */
|
||||
} xfs_mount_t;
|
||||
|
||||
/*
|
||||
|
@ -379,6 +391,8 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
|
|||
|
||||
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
|
||||
|
||||
extern void xfs_set_low_space_thresholds(struct xfs_mount *);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
|
||||
|
|
|
@ -1350,7 +1350,7 @@ xfs_trans_fill_vecs(
|
|||
* they could be immediately flushed and we'd have to race with the flusher
|
||||
* trying to pull the item from the AIL as we add it.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
xfs_trans_item_committed(
|
||||
struct xfs_log_item *lip,
|
||||
xfs_lsn_t commit_lsn,
|
||||
|
@ -1425,6 +1425,83 @@ xfs_trans_committed(
|
|||
xfs_trans_free(tp);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xfs_log_item_batch_insert(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items,
|
||||
int nr_items,
|
||||
xfs_lsn_t commit_lsn)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock(&ailp->xa_lock);
|
||||
/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
|
||||
xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
|
||||
|
||||
for (i = 0; i < nr_items; i++)
|
||||
IOP_UNPIN(log_items[i], 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bulk operation version of xfs_trans_committed that takes a log vector of
|
||||
* items to insert into the AIL. This uses bulk AIL insertion techniques to
|
||||
* minimise lock traffic.
|
||||
*/
|
||||
void
|
||||
xfs_trans_committed_bulk(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_vec *log_vector,
|
||||
xfs_lsn_t commit_lsn,
|
||||
int aborted)
|
||||
{
|
||||
#define LOG_ITEM_BATCH_SIZE 32
|
||||
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
|
||||
struct xfs_log_vec *lv;
|
||||
int i = 0;
|
||||
|
||||
/* unpin all the log items */
|
||||
for (lv = log_vector; lv; lv = lv->lv_next ) {
|
||||
struct xfs_log_item *lip = lv->lv_item;
|
||||
xfs_lsn_t item_lsn;
|
||||
|
||||
if (aborted)
|
||||
lip->li_flags |= XFS_LI_ABORTED;
|
||||
item_lsn = IOP_COMMITTED(lip, commit_lsn);
|
||||
|
||||
/* item_lsn of -1 means the item was freed */
|
||||
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
|
||||
continue;
|
||||
|
||||
if (item_lsn != commit_lsn) {
|
||||
|
||||
/*
|
||||
* Not a bulk update option due to unusual item_lsn.
|
||||
* Push into AIL immediately, rechecking the lsn once
|
||||
* we have the ail lock. Then unpin the item.
|
||||
*/
|
||||
spin_lock(&ailp->xa_lock);
|
||||
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
|
||||
xfs_trans_ail_update(ailp, lip, item_lsn);
|
||||
else
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
IOP_UNPIN(lip, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Item is a candidate for bulk AIL insert. */
|
||||
log_items[i++] = lv->lv_item;
|
||||
if (i >= LOG_ITEM_BATCH_SIZE) {
|
||||
xfs_log_item_batch_insert(ailp, log_items,
|
||||
LOG_ITEM_BATCH_SIZE, commit_lsn);
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* make sure we insert the remainder! */
|
||||
if (i)
|
||||
xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the trans_commit code when we notice that
|
||||
* the filesystem is in the middle of a forced shutdown.
|
||||
|
|
|
@ -294,8 +294,8 @@ struct xfs_log_item_desc {
|
|||
#define XFS_ALLOC_BTREE_REF 2
|
||||
#define XFS_BMAP_BTREE_REF 2
|
||||
#define XFS_DIR_BTREE_REF 2
|
||||
#define XFS_INO_REF 2
|
||||
#define XFS_ATTR_BTREE_REF 1
|
||||
#define XFS_INO_REF 1
|
||||
#define XFS_DQUOT_REF 1
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_error.h"
|
||||
|
||||
STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
|
||||
STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
|
||||
STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
|
||||
STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
|
||||
STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
|
||||
STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
|
||||
|
||||
|
@ -449,129 +449,152 @@ xfs_trans_unlocked_item(
|
|||
xfs_log_move_tail(ailp->xa_mount, 1);
|
||||
} /* xfs_trans_unlocked_item */
|
||||
|
||||
|
||||
/*
|
||||
* Update the position of the item in the AIL with the new
|
||||
* lsn. If it is not yet in the AIL, add it. Otherwise, move
|
||||
* it to its new position by removing it and re-adding it.
|
||||
* xfs_trans_ail_update - bulk AIL insertion operation.
|
||||
*
|
||||
* Wakeup anyone with an lsn less than the item's lsn. If the item
|
||||
* we move in the AIL is the minimum one, update the tail lsn in the
|
||||
* log manager.
|
||||
* @xfs_trans_ail_update takes an array of log items that all need to be
|
||||
* positioned at the same LSN in the AIL. If an item is not in the AIL, it will
|
||||
* be added. Otherwise, it will be repositioned by removing it and re-adding
|
||||
* it to the AIL. If we move the first item in the AIL, update the log tail to
|
||||
* match the new minimum LSN in the AIL.
|
||||
*
|
||||
* This function must be called with the AIL lock held. The lock
|
||||
* is dropped before returning.
|
||||
* This function takes the AIL lock once to execute the update operations on
|
||||
* all the items in the array, and as such should not be called with the AIL
|
||||
* lock held. As a result, once we have the AIL lock, we need to check each log
|
||||
* item LSN to confirm it needs to be moved forward in the AIL.
|
||||
*
|
||||
* To optimise the insert operation, we delete all the items from the AIL in
|
||||
* the first pass, moving them into a temporary list, then splice the temporary
|
||||
* list into the correct position in the AIL. This avoids needing to do an
|
||||
* insert operation on every item.
|
||||
*
|
||||
* This function must be called with the AIL lock held. The lock is dropped
|
||||
* before returning.
|
||||
*/
|
||||
void
|
||||
xfs_trans_ail_update(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip,
|
||||
xfs_lsn_t lsn) __releases(ailp->xa_lock)
|
||||
xfs_trans_ail_update_bulk(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items,
|
||||
int nr_items,
|
||||
xfs_lsn_t lsn) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_log_item_t *dlip = NULL;
|
||||
xfs_log_item_t *mlip; /* ptr to minimum lip */
|
||||
xfs_log_item_t *mlip;
|
||||
xfs_lsn_t tail_lsn;
|
||||
int mlip_changed = 0;
|
||||
int i;
|
||||
LIST_HEAD(tmp);
|
||||
|
||||
mlip = xfs_ail_min(ailp);
|
||||
|
||||
if (lip->li_flags & XFS_LI_IN_AIL) {
|
||||
dlip = xfs_ail_delete(ailp, lip);
|
||||
ASSERT(dlip == lip);
|
||||
xfs_trans_ail_cursor_clear(ailp, dlip);
|
||||
} else {
|
||||
lip->li_flags |= XFS_LI_IN_AIL;
|
||||
}
|
||||
for (i = 0; i < nr_items; i++) {
|
||||
struct xfs_log_item *lip = log_items[i];
|
||||
if (lip->li_flags & XFS_LI_IN_AIL) {
|
||||
/* check if we really need to move the item */
|
||||
if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
|
||||
continue;
|
||||
|
||||
lip->li_lsn = lsn;
|
||||
xfs_ail_insert(ailp, lip);
|
||||
|
||||
if (mlip == dlip) {
|
||||
mlip = xfs_ail_min(ailp);
|
||||
/*
|
||||
* It is not safe to access mlip after the AIL lock is
|
||||
* dropped, so we must get a copy of li_lsn before we do
|
||||
* so. This is especially important on 32-bit platforms
|
||||
* where accessing and updating 64-bit values like li_lsn
|
||||
* is not atomic.
|
||||
*/
|
||||
tail_lsn = mlip->li_lsn;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
xfs_log_move_tail(ailp->xa_mount, tail_lsn);
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
}
|
||||
|
||||
|
||||
} /* xfs_trans_update_ail */
|
||||
|
||||
/*
|
||||
* Delete the given item from the AIL. It must already be in
|
||||
* the AIL.
|
||||
*
|
||||
* Wakeup anyone with an lsn less than item's lsn. If the item
|
||||
* we delete in the AIL is the minimum one, update the tail lsn in the
|
||||
* log manager.
|
||||
*
|
||||
* Clear the IN_AIL flag from the item, reset its lsn to 0, and
|
||||
* bump the AIL's generation count to indicate that the tree
|
||||
* has changed.
|
||||
*
|
||||
* This function must be called with the AIL lock held. The lock
|
||||
* is dropped before returning.
|
||||
*/
|
||||
void
|
||||
xfs_trans_ail_delete(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_log_item_t *dlip;
|
||||
xfs_log_item_t *mlip;
|
||||
xfs_lsn_t tail_lsn;
|
||||
|
||||
if (lip->li_flags & XFS_LI_IN_AIL) {
|
||||
mlip = xfs_ail_min(ailp);
|
||||
dlip = xfs_ail_delete(ailp, lip);
|
||||
ASSERT(dlip == lip);
|
||||
xfs_trans_ail_cursor_clear(ailp, dlip);
|
||||
|
||||
|
||||
lip->li_flags &= ~XFS_LI_IN_AIL;
|
||||
lip->li_lsn = 0;
|
||||
|
||||
if (mlip == dlip) {
|
||||
mlip = xfs_ail_min(ailp);
|
||||
/*
|
||||
* It is not safe to access mlip after the AIL lock
|
||||
* is dropped, so we must get a copy of li_lsn
|
||||
* before we do so. This is especially important
|
||||
* on 32-bit platforms where accessing and updating
|
||||
* 64-bit values like li_lsn is not atomic.
|
||||
*/
|
||||
tail_lsn = mlip ? mlip->li_lsn : 0;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
xfs_log_move_tail(ailp->xa_mount, tail_lsn);
|
||||
xfs_ail_delete(ailp, lip);
|
||||
if (mlip == lip)
|
||||
mlip_changed = 1;
|
||||
} else {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
lip->li_flags |= XFS_LI_IN_AIL;
|
||||
}
|
||||
lip->li_lsn = lsn;
|
||||
list_add(&lip->li_ail, &tmp);
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* If the file system is not being shutdown, we are in
|
||||
* serious trouble if we get to this stage.
|
||||
*/
|
||||
struct xfs_mount *mp = ailp->xa_mount;
|
||||
|
||||
xfs_ail_splice(ailp, &tmp, lsn);
|
||||
|
||||
if (!mlip_changed) {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
if (!XFS_FORCED_SHUTDOWN(mp)) {
|
||||
xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
|
||||
"%s: attempting to delete a log item that is not in the AIL",
|
||||
__func__);
|
||||
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not safe to access mlip after the AIL lock is dropped, so we
|
||||
* must get a copy of li_lsn before we do so. This is especially
|
||||
* important on 32-bit platforms where accessing and updating 64-bit
|
||||
* values like li_lsn is not atomic.
|
||||
*/
|
||||
mlip = xfs_ail_min(ailp);
|
||||
tail_lsn = mlip->li_lsn;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
xfs_log_move_tail(ailp->xa_mount, tail_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
|
||||
*
|
||||
* @xfs_trans_ail_delete_bulk takes an array of log items that all need to
|
||||
* removed from the AIL. The caller is already holding the AIL lock, and done
|
||||
* all the checks necessary to ensure the items passed in via @log_items are
|
||||
* ready for deletion. This includes checking that the items are in the AIL.
|
||||
*
|
||||
* For each log item to be removed, unlink it from the AIL, clear the IN_AIL
|
||||
* flag from the item and reset the item's lsn to 0. If we remove the first
|
||||
* item in the AIL, update the log tail to match the new minimum LSN in the
|
||||
* AIL.
|
||||
*
|
||||
* This function will not drop the AIL lock until all items are removed from
|
||||
* the AIL to minimise the amount of lock traffic on the AIL. This does not
|
||||
* greatly increase the AIL hold time, but does significantly reduce the amount
|
||||
* of traffic on the lock, especially during IO completion.
|
||||
*
|
||||
* This function must be called with the AIL lock held. The lock is dropped
|
||||
* before returning.
|
||||
*/
|
||||
void
|
||||
xfs_trans_ail_delete_bulk(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items,
|
||||
int nr_items) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_log_item_t *mlip;
|
||||
xfs_lsn_t tail_lsn;
|
||||
int mlip_changed = 0;
|
||||
int i;
|
||||
|
||||
mlip = xfs_ail_min(ailp);
|
||||
|
||||
for (i = 0; i < nr_items; i++) {
|
||||
struct xfs_log_item *lip = log_items[i];
|
||||
if (!(lip->li_flags & XFS_LI_IN_AIL)) {
|
||||
struct xfs_mount *mp = ailp->xa_mount;
|
||||
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
if (!XFS_FORCED_SHUTDOWN(mp)) {
|
||||
xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
|
||||
"%s: attempting to delete a log item that is not in the AIL",
|
||||
__func__);
|
||||
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
xfs_ail_delete(ailp, lip);
|
||||
lip->li_flags &= ~XFS_LI_IN_AIL;
|
||||
lip->li_lsn = 0;
|
||||
if (mlip == lip)
|
||||
mlip_changed = 1;
|
||||
}
|
||||
|
||||
if (!mlip_changed) {
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not safe to access mlip after the AIL lock is dropped, so we
|
||||
* must get a copy of li_lsn before we do so. This is especially
|
||||
* important on 32-bit platforms where accessing and updating 64-bit
|
||||
* values like li_lsn is not atomic. It is possible we've emptied the
|
||||
* AIL here, so if that is the case, pass an LSN of 0 to the tail move.
|
||||
*/
|
||||
mlip = xfs_ail_min(ailp);
|
||||
tail_lsn = mlip ? mlip->li_lsn : 0;
|
||||
spin_unlock(&ailp->xa_lock);
|
||||
xfs_log_move_tail(ailp->xa_mount, tail_lsn);
|
||||
}
|
||||
|
||||
/*
|
||||
* The active item list (AIL) is a doubly linked list of log
|
||||
|
@ -623,16 +646,13 @@ xfs_trans_ail_destroy(
|
|||
}
|
||||
|
||||
/*
|
||||
* Insert the given log item into the AIL.
|
||||
* We almost always insert at the end of the list, so on inserts
|
||||
* we search from the end of the list to find where the
|
||||
* new item belongs.
|
||||
* splice the log item list into the AIL at the given LSN.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_ail_insert(
|
||||
xfs_ail_splice(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip)
|
||||
/* ARGSUSED */
|
||||
struct list_head *list,
|
||||
xfs_lsn_t lsn)
|
||||
{
|
||||
xfs_log_item_t *next_lip;
|
||||
|
||||
|
@ -640,39 +660,33 @@ xfs_ail_insert(
|
|||
* If the list is empty, just insert the item.
|
||||
*/
|
||||
if (list_empty(&ailp->xa_ail)) {
|
||||
list_add(&lip->li_ail, &ailp->xa_ail);
|
||||
list_splice(list, &ailp->xa_ail);
|
||||
return;
|
||||
}
|
||||
|
||||
list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
|
||||
if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
|
||||
if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
|
||||
(XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
|
||||
(XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
|
||||
|
||||
list_add(&lip->li_ail, &next_lip->li_ail);
|
||||
|
||||
xfs_ail_check(ailp, lip);
|
||||
list_splice_init(list, &next_lip->li_ail);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete the given item from the AIL. Return a pointer to the item.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
STATIC xfs_log_item_t *
|
||||
STATIC void
|
||||
xfs_ail_delete(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip)
|
||||
/* ARGSUSED */
|
||||
{
|
||||
xfs_ail_check(ailp, lip);
|
||||
|
||||
list_del(&lip->li_ail);
|
||||
|
||||
return lip;
|
||||
xfs_trans_ail_cursor_clear(ailp, lip);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -682,7 +696,6 @@ xfs_ail_delete(
|
|||
STATIC xfs_log_item_t *
|
||||
xfs_ail_min(
|
||||
struct xfs_ail *ailp)
|
||||
/* ARGSUSED */
|
||||
{
|
||||
if (list_empty(&ailp->xa_ail))
|
||||
return NULL;
|
||||
|
@ -699,7 +712,6 @@ STATIC xfs_log_item_t *
|
|||
xfs_ail_next(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip)
|
||||
/* ARGSUSED */
|
||||
{
|
||||
if (lip->li_ail.next == &ailp->xa_ail)
|
||||
return NULL;
|
||||
|
|
|
@ -69,12 +69,16 @@ xfs_trans_log_efi_extent(xfs_trans_t *tp,
|
|||
tp->t_flags |= XFS_TRANS_DIRTY;
|
||||
efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
||||
|
||||
next_extent = efip->efi_next_extent;
|
||||
/*
|
||||
* atomic_inc_return gives us the value after the increment;
|
||||
* we want to use it as an array index so we need to subtract 1 from
|
||||
* it.
|
||||
*/
|
||||
next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
|
||||
ASSERT(next_extent < efip->efi_format.efi_nextents);
|
||||
extp = &(efip->efi_format.efi_extents[next_extent]);
|
||||
extp->ext_start = start_block;
|
||||
extp->ext_len = ext_len;
|
||||
efip->efi_next_extent++;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -22,15 +22,17 @@ struct xfs_log_item;
|
|||
struct xfs_log_item_desc;
|
||||
struct xfs_mount;
|
||||
struct xfs_trans;
|
||||
struct xfs_ail;
|
||||
struct xfs_log_vec;
|
||||
|
||||
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
|
||||
void xfs_trans_del_item(struct xfs_log_item *);
|
||||
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
|
||||
int flags);
|
||||
void xfs_trans_item_committed(struct xfs_log_item *lip,
|
||||
xfs_lsn_t commit_lsn, int aborted);
|
||||
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
|
||||
|
||||
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
|
||||
xfs_lsn_t commit_lsn, int aborted);
|
||||
/*
|
||||
* AIL traversal cursor.
|
||||
*
|
||||
|
@ -73,12 +75,29 @@ struct xfs_ail {
|
|||
/*
|
||||
* From xfs_trans_ail.c
|
||||
*/
|
||||
void xfs_trans_ail_update(struct xfs_ail *ailp,
|
||||
struct xfs_log_item *lip, xfs_lsn_t lsn)
|
||||
__releases(ailp->xa_lock);
|
||||
void xfs_trans_ail_delete(struct xfs_ail *ailp,
|
||||
struct xfs_log_item *lip)
|
||||
__releases(ailp->xa_lock);
|
||||
void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items, int nr_items,
|
||||
xfs_lsn_t lsn) __releases(ailp->xa_lock);
|
||||
static inline void
|
||||
xfs_trans_ail_update(
|
||||
struct xfs_ail *ailp,
|
||||
struct xfs_log_item *lip,
|
||||
xfs_lsn_t lsn) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
|
||||
}
|
||||
|
||||
void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
|
||||
struct xfs_log_item **log_items, int nr_items)
|
||||
__releases(ailp->xa_lock);
|
||||
static inline void
|
||||
xfs_trans_ail_delete(
|
||||
struct xfs_ail *ailp,
|
||||
xfs_log_item_t *lip) __releases(ailp->xa_lock)
|
||||
{
|
||||
xfs_trans_ail_delete_bulk(ailp, &lip, 1);
|
||||
}
|
||||
|
||||
void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
|
||||
void xfs_trans_unlocked_item(struct xfs_ail *,
|
||||
xfs_log_item_t *);
|
||||
|
|
|
@ -964,29 +964,48 @@ xfs_release(
|
|||
xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
|
||||
}
|
||||
|
||||
if (ip->i_d.di_nlink != 0) {
|
||||
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
|
||||
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
|
||||
ip->i_delayed_blks > 0)) &&
|
||||
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
|
||||
(!(ip->i_d.di_flags &
|
||||
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
|
||||
if (ip->i_d.di_nlink == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we can't get the iolock just skip truncating
|
||||
* the blocks past EOF because we could deadlock
|
||||
* with the mmap_sem otherwise. We'll get another
|
||||
* chance to drop them once the last reference to
|
||||
* the inode is dropped, so we'll never leak blocks
|
||||
* permanently.
|
||||
*/
|
||||
error = xfs_free_eofblocks(mp, ip,
|
||||
XFS_FREE_EOF_TRYLOCK);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
|
||||
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
|
||||
ip->i_delayed_blks > 0)) &&
|
||||
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
|
||||
(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
|
||||
|
||||
/*
|
||||
* If we can't get the iolock just skip truncating the blocks
|
||||
* past EOF because we could deadlock with the mmap_sem
|
||||
* otherwise. We'll get another chance to drop them once the
|
||||
* last reference to the inode is dropped, so we'll never leak
|
||||
* blocks permanently.
|
||||
*
|
||||
* Further, check if the inode is being opened, written and
|
||||
* closed frequently and we have delayed allocation blocks
|
||||
* oustanding (e.g. streaming writes from the NFS server),
|
||||
* truncating the blocks past EOF will cause fragmentation to
|
||||
* occur.
|
||||
*
|
||||
* In this case don't do the truncation, either, but we have to
|
||||
* be careful how we detect this case. Blocks beyond EOF show
|
||||
* up as i_delayed_blks even when the inode is clean, so we
|
||||
* need to truncate them away first before checking for a dirty
|
||||
* release. Hence on the first dirty close we will still remove
|
||||
* the speculative allocation, but after that we will leave it
|
||||
* in place.
|
||||
*/
|
||||
if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
|
||||
return 0;
|
||||
|
||||
error = xfs_free_eofblocks(mp, ip,
|
||||
XFS_FREE_EOF_TRYLOCK);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* delalloc blocks after truncation means it really is dirty */
|
||||
if (ip->i_delayed_blks)
|
||||
xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue