mirror of https://gitee.com/openkylin/linux.git
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (27 commits) ocfs2: Cache extent records ocfs2: Remember rw lock level during direct io ocfs2: Fix up i_blocks calculation to know about holes ocfs2: Fix extent lookup to return true size of holes ocfs2: Read from an unwritten extent returns zeros ocfs2: make room for unwritten extents flag ocfs2: Use own splice write actor ocfs2: Use do_sync_mapping_range() in ocfs2_zero_tail_for_truncate() [PATCH] Turn do_sync_file_range() into do_sync_mapping_range() ocfs2: zero tail of sparse files on truncate ocfs2: Teach ocfs2_get_block() about holes ocfs2: remove ocfs2_prepare_write() and ocfs2_commit_write() ocfs2: teach ocfs2_file_aio_write() about sparse files ocfs2: Turn off shared writeable mmap for local files systems with holes. ocfs2: abstract out allocation locking ocfs2: teach extend/truncate about sparse files ocfs2: temporarily remove extent map caching ocfs2: sparse b-tree support ocfs2: small cleanup of ocfs2_request_delete() ocfs2: remove unused code ...
This commit is contained in:
commit
ea6db58f3e
3081
fs/ocfs2/alloc.c
3081
fs/ocfs2/alloc.c
File diff suppressed because it is too large
Load Diff
|
@ -31,7 +31,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
|
|||
handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
u64 blkno,
|
||||
u32 cpos,
|
||||
u64 start_blk,
|
||||
u32 new_clusters,
|
||||
struct ocfs2_alloc_context *meta_ac);
|
||||
int ocfs2_num_free_extents(struct ocfs2_super *osb,
|
||||
|
@ -70,6 +71,8 @@ struct ocfs2_truncate_context {
|
|||
struct buffer_head *tc_last_eb_bh;
|
||||
};
|
||||
|
||||
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
|
||||
u64 new_i_size);
|
||||
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
|
@ -79,4 +82,26 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
|
|||
struct buffer_head *fe_bh,
|
||||
struct ocfs2_truncate_context *tc);
|
||||
|
||||
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
|
||||
u32 cpos, struct buffer_head **leaf_bh);
|
||||
|
||||
/*
|
||||
* Helper function to look at the # of clusters in an extent record.
|
||||
*/
|
||||
static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
|
||||
struct ocfs2_extent_rec *rec)
|
||||
{
|
||||
/*
|
||||
* Cluster count in extent records is slightly different
|
||||
* between interior nodes and leaf nodes. This is to support
|
||||
* unwritten extents which need a flags field in leaf node
|
||||
* records, thus shrinking the available space for a clusters
|
||||
* field.
|
||||
*/
|
||||
if (el->l_tree_depth)
|
||||
return le32_to_cpu(rec->e_int_clusters);
|
||||
else
|
||||
return le16_to_cpu(rec->e_leaf_clusters);
|
||||
}
|
||||
|
||||
#endif /* OCFS2_ALLOC_H */
|
||||
|
|
1011
fs/ocfs2/aops.c
1011
fs/ocfs2/aops.c
File diff suppressed because it is too large
Load Diff
|
@ -30,12 +30,83 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
|
|||
unsigned from,
|
||||
unsigned to);
|
||||
|
||||
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
|
||||
struct inode *inode, unsigned int from,
|
||||
unsigned int to, int new);
|
||||
|
||||
int walk_page_buffers( handle_t *handle,
|
||||
struct buffer_head *head,
|
||||
unsigned from,
|
||||
unsigned to,
|
||||
int *partial,
|
||||
int (*fn)( handle_t *handle,
|
||||
struct buffer_head *bh));
|
||||
|
||||
struct ocfs2_write_ctxt;
|
||||
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
|
||||
u64 *, unsigned int *, unsigned int *);
|
||||
|
||||
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
|
||||
size_t count, ocfs2_page_writer *actor,
|
||||
void *priv);
|
||||
|
||||
struct ocfs2_write_ctxt {
|
||||
size_t w_count;
|
||||
loff_t w_pos;
|
||||
u32 w_cpos;
|
||||
unsigned int w_finished_copy;
|
||||
|
||||
/* This is true if page_size > cluster_size */
|
||||
unsigned int w_large_pages;
|
||||
|
||||
/* Filler callback and private data */
|
||||
ocfs2_page_writer *w_write_data_page;
|
||||
void *w_private;
|
||||
|
||||
/* Only valid for the filler callback */
|
||||
struct page *w_this_page;
|
||||
unsigned int w_this_page_new;
|
||||
};
|
||||
|
||||
struct ocfs2_buffered_write_priv {
|
||||
char *b_src_buf;
|
||||
const struct iovec *b_cur_iov; /* Current iovec */
|
||||
size_t b_cur_off; /* Offset in the
|
||||
* current iovec */
|
||||
};
|
||||
int ocfs2_map_and_write_user_data(struct inode *inode,
|
||||
struct ocfs2_write_ctxt *wc,
|
||||
u64 *p_blkno,
|
||||
unsigned int *ret_from,
|
||||
unsigned int *ret_to);
|
||||
|
||||
struct ocfs2_splice_write_priv {
|
||||
struct splice_desc *s_sd;
|
||||
struct pipe_buffer *s_buf;
|
||||
struct pipe_inode_info *s_pipe;
|
||||
/* Neither offset value is ever larger than one page */
|
||||
unsigned int s_offset;
|
||||
unsigned int s_buf_offset;
|
||||
};
|
||||
int ocfs2_map_and_write_splice_data(struct inode *inode,
|
||||
struct ocfs2_write_ctxt *wc,
|
||||
u64 *p_blkno,
|
||||
unsigned int *ret_from,
|
||||
unsigned int *ret_to);
|
||||
|
||||
/* all ocfs2_dio_end_io()'s fault */
|
||||
#define ocfs2_iocb_is_rw_locked(iocb) \
|
||||
test_bit(0, (unsigned long *)&iocb->private)
|
||||
#define ocfs2_iocb_set_rw_locked(iocb) \
|
||||
set_bit(0, (unsigned long *)&iocb->private)
|
||||
static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
|
||||
{
|
||||
set_bit(0, (unsigned long *)&iocb->private);
|
||||
if (level)
|
||||
set_bit(1, (unsigned long *)&iocb->private);
|
||||
else
|
||||
clear_bit(1, (unsigned long *)&iocb->private);
|
||||
}
|
||||
#define ocfs2_iocb_clear_rw_locked(iocb) \
|
||||
clear_bit(0, (unsigned long *)&iocb->private)
|
||||
|
||||
#define ocfs2_iocb_rw_locked_level(iocb) \
|
||||
test_bit(1, (unsigned long *)&iocb->private)
|
||||
#endif /* OCFS2_FILE_H */
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/reboot.h>
|
||||
|
||||
#include "heartbeat.h"
|
||||
#include "nodemanager.h"
|
||||
|
@ -72,7 +73,9 @@ static void o2quo_fence_self(void)
|
|||
/* panic spins with interrupts enabled. with preempt
|
||||
* threads can still schedule, etc, etc */
|
||||
o2hb_stop_all_regions();
|
||||
panic("ocfs2 is very sorry to be fencing this system by panicing\n");
|
||||
|
||||
printk("ocfs2 is very sorry to be fencing this system by restarting\n");
|
||||
emergency_restart();
|
||||
}
|
||||
|
||||
/* Indicate that a timeout occured on a hearbeat region write. The
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
* locking semantics of the file system using the protocol. It should
|
||||
* be somewhere else, I'm sure, but right now it isn't.
|
||||
*
|
||||
* New in version 8:
|
||||
* - Replace delete inode votes with a cluster lock
|
||||
*
|
||||
* New in version 7:
|
||||
* - DLM join domain includes the live nodemap
|
||||
*
|
||||
|
@ -57,7 +60,7 @@
|
|||
* - full 64 bit i_size in the metadata lock lvbs
|
||||
* - introduction of "rw" lock and pushing meta/data locking down
|
||||
*/
|
||||
#define O2NET_PROTOCOL_VERSION 7ULL
|
||||
#define O2NET_PROTOCOL_VERSION 8ULL
|
||||
struct o2net_handshake {
|
||||
__be64 protocol_version;
|
||||
__be64 connector_id;
|
||||
|
|
|
@ -358,15 +358,17 @@ int ocfs2_do_extend_dir(struct super_block *sb,
|
|||
{
|
||||
int status;
|
||||
int extend;
|
||||
u64 p_blkno;
|
||||
u64 p_blkno, v_blkno;
|
||||
|
||||
spin_lock(&OCFS2_I(dir)->ip_lock);
|
||||
extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
|
||||
spin_unlock(&OCFS2_I(dir)->ip_lock);
|
||||
|
||||
if (extend) {
|
||||
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
|
||||
parent_fe_bh, handle,
|
||||
u32 offset = OCFS2_I(dir)->ip_clusters;
|
||||
|
||||
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
|
||||
1, parent_fe_bh, handle,
|
||||
data_ac, meta_ac, NULL);
|
||||
BUG_ON(status == -EAGAIN);
|
||||
if (status < 0) {
|
||||
|
@ -375,9 +377,8 @@ int ocfs2_do_extend_dir(struct super_block *sb,
|
|||
}
|
||||
}
|
||||
|
||||
status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >>
|
||||
(sb->s_blocksize_bits - 9)),
|
||||
1, &p_blkno, NULL);
|
||||
v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
|
||||
status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
@ -486,7 +487,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
|
|||
|
||||
dir_i_size += dir->i_sb->s_blocksize;
|
||||
i_size_write(dir, dir_i_size);
|
||||
dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size);
|
||||
dir->i_blocks = ocfs2_inode_sector_count(dir);
|
||||
status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
|
|
@ -430,11 +430,10 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
|
|||
|
||||
dlm_lockres_put(res);
|
||||
|
||||
cond_resched_lock(&dlm->spinlock);
|
||||
|
||||
if (dropped)
|
||||
goto redo_bucket;
|
||||
}
|
||||
cond_resched_lock(&dlm->spinlock);
|
||||
num += n;
|
||||
mlog(0, "%s: touched %d lockreses in bucket %d "
|
||||
"(tot=%d)\n", dlm->name, n, i, num);
|
||||
|
@ -1035,7 +1034,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
|
|||
{
|
||||
int status = 0, tmpstat, node;
|
||||
struct domain_join_ctxt *ctxt;
|
||||
enum dlm_query_join_response response;
|
||||
enum dlm_query_join_response response = JOIN_DISALLOW;
|
||||
|
||||
mlog_entry("%p", dlm);
|
||||
|
||||
|
|
|
@ -611,6 +611,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
}
|
||||
} while (status != 0);
|
||||
|
||||
spin_lock(&dlm_reco_state_lock);
|
||||
switch (ndata->state) {
|
||||
case DLM_RECO_NODE_DATA_INIT:
|
||||
case DLM_RECO_NODE_DATA_FINALIZE_SENT:
|
||||
|
@ -641,6 +642,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
ndata->node_num, dead_node);
|
||||
break;
|
||||
}
|
||||
spin_unlock(&dlm_reco_state_lock);
|
||||
}
|
||||
|
||||
mlog(0, "done requesting all lock info\n");
|
||||
|
|
|
@ -225,11 +225,17 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
|
|||
.flags = 0,
|
||||
};
|
||||
|
||||
static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
|
||||
.get_osb = ocfs2_get_inode_osb,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
|
||||
{
|
||||
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
|
||||
lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
|
||||
lockres->l_type == OCFS2_LOCK_TYPE_RW;
|
||||
lockres->l_type == OCFS2_LOCK_TYPE_RW ||
|
||||
lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
|
||||
}
|
||||
|
||||
static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
|
||||
|
@ -373,6 +379,9 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
|
|||
case OCFS2_LOCK_TYPE_DATA:
|
||||
ops = &ocfs2_inode_data_lops;
|
||||
break;
|
||||
case OCFS2_LOCK_TYPE_OPEN:
|
||||
ops = &ocfs2_inode_open_lops;
|
||||
break;
|
||||
default:
|
||||
mlog_bug_on_msg(1, "type: %d\n", type);
|
||||
ops = NULL; /* thanks, gcc */
|
||||
|
@ -1129,6 +1138,12 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
|
|||
goto bail;
|
||||
}
|
||||
|
||||
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
bail:
|
||||
mlog_exit(ret);
|
||||
return ret;
|
||||
|
@ -1182,6 +1197,99 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
|
|||
mlog_exit_void();
|
||||
}
|
||||
|
||||
/*
|
||||
* ocfs2_open_lock always get PR mode lock.
|
||||
*/
|
||||
int ocfs2_open_lock(struct inode *inode)
|
||||
{
|
||||
int status = 0;
|
||||
struct ocfs2_lock_res *lockres;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
BUG_ON(!inode);
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
mlog(0, "inode %llu take PRMODE open lock\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
goto out;
|
||||
|
||||
lockres = &OCFS2_I(inode)->ip_open_lockres;
|
||||
|
||||
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
|
||||
LKM_PRMODE, 0, 0);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
|
||||
out:
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_try_open_lock(struct inode *inode, int write)
|
||||
{
|
||||
int status = 0, level;
|
||||
struct ocfs2_lock_res *lockres;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
BUG_ON(!inode);
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
mlog(0, "inode %llu try to take %s open lock\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
write ? "EXMODE" : "PRMODE");
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
goto out;
|
||||
|
||||
lockres = &OCFS2_I(inode)->ip_open_lockres;
|
||||
|
||||
level = write ? LKM_EXMODE : LKM_PRMODE;
|
||||
|
||||
/*
|
||||
* The file system may already holding a PRMODE/EXMODE open lock.
|
||||
* Since we pass LKM_NOQUEUE, the request won't block waiting on
|
||||
* other nodes and the -EAGAIN will indicate to the caller that
|
||||
* this inode is still in use.
|
||||
*/
|
||||
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
|
||||
level, LKM_NOQUEUE, 0);
|
||||
|
||||
out:
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* ocfs2_open_unlock unlock PR and EX mode open locks.
|
||||
*/
|
||||
void ocfs2_open_unlock(struct inode *inode)
|
||||
{
|
||||
struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
mlog(0, "inode %llu drop open lock\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
goto out;
|
||||
|
||||
if(lockres->l_ro_holders)
|
||||
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
|
||||
LKM_PRMODE);
|
||||
if(lockres->l_ex_holders)
|
||||
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
|
||||
LKM_EXMODE);
|
||||
|
||||
out:
|
||||
mlog_exit_void();
|
||||
}
|
||||
|
||||
int ocfs2_data_lock_full(struct inode *inode,
|
||||
int write,
|
||||
int arg_flags)
|
||||
|
@ -1387,8 +1495,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
|
|||
if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
|
||||
inode->i_blocks = 0;
|
||||
else
|
||||
inode->i_blocks =
|
||||
ocfs2_align_bytes_to_sectors(i_size_read(inode));
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
|
||||
inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
|
||||
inode->i_gid = be32_to_cpu(lvb->lvb_igid);
|
||||
|
@ -1479,12 +1586,15 @@ static int ocfs2_meta_lock_update(struct inode *inode,
|
|||
{
|
||||
int status = 0;
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
struct ocfs2_lock_res *lockres = NULL;
|
||||
struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
|
||||
struct ocfs2_dinode *fe;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
if (ocfs2_mount_local(osb))
|
||||
goto bail;
|
||||
|
||||
spin_lock(&oi->ip_lock);
|
||||
if (oi->ip_flags & OCFS2_INODE_DELETED) {
|
||||
mlog(0, "Orphaned inode %llu was deleted while we "
|
||||
|
@ -1496,22 +1606,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
|
|||
}
|
||||
spin_unlock(&oi->ip_lock);
|
||||
|
||||
if (!ocfs2_mount_local(osb)) {
|
||||
lockres = &oi->ip_meta_lockres;
|
||||
|
||||
if (!ocfs2_should_refresh_lock_res(lockres))
|
||||
goto bail;
|
||||
}
|
||||
if (!ocfs2_should_refresh_lock_res(lockres))
|
||||
goto bail;
|
||||
|
||||
/* This will discard any caching information we might have had
|
||||
* for the inode metadata. */
|
||||
ocfs2_metadata_cache_purge(inode);
|
||||
|
||||
/* will do nothing for inode types that don't use the extent
|
||||
* map (directories, bitmap files, etc) */
|
||||
ocfs2_extent_map_trunc(inode, 0);
|
||||
|
||||
if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) {
|
||||
if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
|
||||
mlog(0, "Trusting LVB on inode %llu\n",
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
ocfs2_refresh_inode_from_lvb(inode);
|
||||
|
@ -1558,8 +1662,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
|
|||
|
||||
status = 0;
|
||||
bail_refresh:
|
||||
if (lockres)
|
||||
ocfs2_complete_lock_res_refresh(lockres, status);
|
||||
ocfs2_complete_lock_res_refresh(lockres, status);
|
||||
bail:
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
|
@ -1630,7 +1733,6 @@ int ocfs2_meta_lock_full(struct inode *inode,
|
|||
wait_event(osb->recovery_event,
|
||||
ocfs2_node_map_is_empty(osb, &osb->recovery_map));
|
||||
|
||||
acquired = 0;
|
||||
lockres = &OCFS2_I(inode)->ip_meta_lockres;
|
||||
level = ex ? LKM_EXMODE : LKM_PRMODE;
|
||||
dlm_flags = 0;
|
||||
|
@ -2458,12 +2560,19 @@ int ocfs2_drop_inode_locks(struct inode *inode)
|
|||
* ocfs2_clear_inode has done it for us. */
|
||||
|
||||
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
|
||||
&OCFS2_I(inode)->ip_data_lockres);
|
||||
&OCFS2_I(inode)->ip_open_lockres);
|
||||
if (err < 0)
|
||||
mlog_errno(err);
|
||||
|
||||
status = err;
|
||||
|
||||
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
|
||||
&OCFS2_I(inode)->ip_data_lockres);
|
||||
if (err < 0)
|
||||
mlog_errno(err);
|
||||
if (err < 0 && !status)
|
||||
status = err;
|
||||
|
||||
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
|
||||
&OCFS2_I(inode)->ip_meta_lockres);
|
||||
if (err < 0)
|
||||
|
|
|
@ -80,6 +80,9 @@ void ocfs2_data_unlock(struct inode *inode,
|
|||
int write);
|
||||
int ocfs2_rw_lock(struct inode *inode, int write);
|
||||
void ocfs2_rw_unlock(struct inode *inode, int write);
|
||||
int ocfs2_open_lock(struct inode *inode);
|
||||
int ocfs2_try_open_lock(struct inode *inode, int write);
|
||||
void ocfs2_open_unlock(struct inode *inode);
|
||||
int ocfs2_meta_lock_atime(struct inode *inode,
|
||||
struct vfsmount *vfsmnt,
|
||||
int *level);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,22 +25,29 @@
|
|||
#ifndef _EXTENT_MAP_H
|
||||
#define _EXTENT_MAP_H
|
||||
|
||||
int init_ocfs2_extent_maps(void);
|
||||
void exit_ocfs2_extent_maps(void);
|
||||
struct ocfs2_extent_map_item {
|
||||
unsigned int ei_cpos;
|
||||
unsigned int ei_phys;
|
||||
unsigned int ei_clusters;
|
||||
unsigned int ei_flags;
|
||||
|
||||
/*
|
||||
* EVERY CALL here except _init, _trunc, and _drop expects alloc_sem
|
||||
* to be held. The allocation cannot change at all while the map is
|
||||
* in the process of being updated.
|
||||
*/
|
||||
int ocfs2_extent_map_init(struct inode *inode);
|
||||
int ocfs2_extent_map_append(struct inode *inode,
|
||||
struct ocfs2_extent_rec *rec,
|
||||
u32 new_clusters);
|
||||
int ocfs2_extent_map_get_blocks(struct inode *inode,
|
||||
u64 v_blkno, int count,
|
||||
u64 *p_blkno, int *ret_count);
|
||||
int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters);
|
||||
int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters);
|
||||
struct list_head ei_list;
|
||||
};
|
||||
|
||||
#define OCFS2_MAX_EXTENT_MAP_ITEMS 3
|
||||
struct ocfs2_extent_map {
|
||||
unsigned int em_num_items;
|
||||
struct list_head em_list;
|
||||
};
|
||||
|
||||
void ocfs2_extent_map_init(struct inode *inode);
|
||||
void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
|
||||
void ocfs2_extent_map_insert_rec(struct inode *inode,
|
||||
struct ocfs2_extent_rec *rec);
|
||||
|
||||
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
|
||||
u32 *num_clusters, unsigned int *extent_flags);
|
||||
int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
|
||||
u64 *ret_count, unsigned int *extent_flags);
|
||||
|
||||
#endif /* _EXTENT_MAP_H */
|
||||
|
|
633
fs/ocfs2/file.c
633
fs/ocfs2/file.c
|
@ -33,6 +33,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/pipe_fs_i.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_INODE
|
||||
#include <cluster/masklog.h>
|
||||
|
@ -215,7 +216,7 @@ int ocfs2_set_inode_size(handle_t *handle,
|
|||
|
||||
mlog_entry_void();
|
||||
i_size_write(inode, new_i_size);
|
||||
inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
|
||||
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
|
||||
|
@ -261,6 +262,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
|
|||
{
|
||||
int status;
|
||||
handle_t *handle;
|
||||
struct ocfs2_dinode *di;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
|
@ -274,12 +276,39 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
|
|||
goto out;
|
||||
}
|
||||
|
||||
status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size);
|
||||
status = ocfs2_journal_access(handle, inode, fe_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do this before setting i_size.
|
||||
*/
|
||||
status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
i_size_write(inode, new_i_size);
|
||||
inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
|
||||
di = (struct ocfs2_dinode *) fe_bh->b_data;
|
||||
di->i_size = cpu_to_le64(new_i_size);
|
||||
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
|
||||
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
|
||||
|
||||
status = ocfs2_journal_dirty(handle, fe_bh);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
|
||||
out_commit:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
out:
|
||||
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
}
|
||||
|
@ -342,19 +371,6 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
|
||||
if (le32_to_cpu(fe->i_clusters) ==
|
||||
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
|
||||
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
|
||||
fe->i_clusters);
|
||||
/* No allocation change is required, so lets fast path
|
||||
* this truncate. */
|
||||
status = ocfs2_simple_size_update(inode, di_bh, new_i_size);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* alright, we're going to need to do a full blown alloc size
|
||||
* change. Orphan the inode so that recovery can complete the
|
||||
|
@ -363,22 +379,25 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
goto bail_unlock_data;
|
||||
}
|
||||
|
||||
status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
goto bail_unlock_data;
|
||||
}
|
||||
|
||||
status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
goto bail_unlock_data;
|
||||
}
|
||||
|
||||
/* TODO: orphan dir cleanup here. */
|
||||
bail_unlock_data:
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
|
||||
bail:
|
||||
|
||||
mlog_exit(status);
|
||||
|
@ -397,6 +416,7 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
*/
|
||||
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
u32 *logical_offset,
|
||||
u32 clusters_to_add,
|
||||
struct buffer_head *fe_bh,
|
||||
handle_t *handle,
|
||||
|
@ -460,18 +480,14 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
|
||||
mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
|
||||
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
|
||||
num_bits, meta_ac);
|
||||
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
|
||||
*logical_offset, block, num_bits,
|
||||
meta_ac);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
le32_add_cpu(&fe->i_clusters, num_bits);
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
|
||||
status = ocfs2_journal_dirty(handle, fe_bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -479,6 +495,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
}
|
||||
|
||||
clusters_to_add -= num_bits;
|
||||
*logical_offset += num_bits;
|
||||
|
||||
if (clusters_to_add) {
|
||||
mlog(0, "need to alloc once more, clusters = %u, wanted = "
|
||||
|
@ -494,14 +511,87 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given allocation, determine which allocators will need to be
|
||||
* accessed, and lock them, reserving the appropriate number of bits.
|
||||
*
|
||||
* Called from ocfs2_extend_allocation() for file systems which don't
|
||||
* support holes, and from ocfs2_write() for file systems which
|
||||
* understand sparse inodes.
|
||||
*/
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
||||
u32 clusters_to_add,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac)
|
||||
{
|
||||
int ret, num_free_extents;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
*meta_ac = NULL;
|
||||
*data_ac = NULL;
|
||||
|
||||
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
|
||||
"clusters_to_add = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
|
||||
le32_to_cpu(di->i_clusters), clusters_to_add);
|
||||
|
||||
num_free_extents = ocfs2_num_free_extents(osb, inode, di);
|
||||
if (num_free_extents < 0) {
|
||||
ret = num_free_extents;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sparse allocation file systems need to be more conservative
|
||||
* with reserving room for expansion - the actual allocation
|
||||
* happens while we've got a journal handle open so re-taking
|
||||
* a cluster lock (because we ran out of room for another
|
||||
* extent) will violate ordering rules.
|
||||
*
|
||||
* Most of the time we'll only be seeing this 1 cluster at a time
|
||||
* anyway.
|
||||
*/
|
||||
if (!num_free_extents ||
|
||||
(ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
|
||||
ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
if (*meta_ac) {
|
||||
ocfs2_free_alloc_context(*meta_ac);
|
||||
*meta_ac = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot have an error and a non null *data_ac.
|
||||
*/
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_extend_allocation(struct inode *inode,
|
||||
u32 clusters_to_add)
|
||||
{
|
||||
int status = 0;
|
||||
int restart_func = 0;
|
||||
int drop_alloc_sem = 0;
|
||||
int credits, num_free_extents;
|
||||
u32 prev_clusters;
|
||||
int credits;
|
||||
u32 prev_clusters, logical_start;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_dinode *fe = NULL;
|
||||
handle_t *handle = NULL;
|
||||
|
@ -512,6 +602,12 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
|
||||
mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
|
||||
|
||||
/*
|
||||
* This function only exists for file systems which don't
|
||||
* support holes.
|
||||
*/
|
||||
BUG_ON(ocfs2_sparse_alloc(osb));
|
||||
|
||||
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
|
||||
OCFS2_BH_CACHED, inode);
|
||||
if (status < 0) {
|
||||
|
@ -526,39 +622,11 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
goto leave;
|
||||
}
|
||||
|
||||
logical_start = OCFS2_I(inode)->ip_clusters;
|
||||
|
||||
restart_all:
|
||||
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
|
||||
|
||||
mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
|
||||
"clusters_to_add = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
|
||||
fe->i_clusters, clusters_to_add);
|
||||
|
||||
num_free_extents = ocfs2_num_free_extents(osb,
|
||||
inode,
|
||||
fe);
|
||||
if (num_free_extents < 0) {
|
||||
status = num_free_extents;
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if (!num_free_extents) {
|
||||
status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac);
|
||||
if (status < 0) {
|
||||
if (status != -ENOSPC)
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* blocks peope in read/write from reading our allocation
|
||||
* until we're done changing it. We depend on i_mutex to block
|
||||
* other extend/truncate calls while we're here. Ordering wrt
|
||||
|
@ -566,6 +634,13 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
drop_alloc_sem = 1;
|
||||
|
||||
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
|
||||
&meta_ac);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
|
||||
handle = ocfs2_start_trans(osb, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
|
@ -590,6 +665,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
|
||||
status = ocfs2_do_extend_allocation(osb,
|
||||
inode,
|
||||
&logical_start,
|
||||
clusters_to_add,
|
||||
bh,
|
||||
handle,
|
||||
|
@ -778,7 +854,7 @@ static int ocfs2_extend_file(struct inode *inode,
|
|||
size_t tail_to_skip)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 clusters_to_add;
|
||||
u32 clusters_to_add = 0;
|
||||
|
||||
BUG_ON(!tail_to_skip && !di_bh);
|
||||
|
||||
|
@ -790,6 +866,11 @@ static int ocfs2_extend_file(struct inode *inode,
|
|||
goto out;
|
||||
BUG_ON(new_i_size < i_size_read(inode));
|
||||
|
||||
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
|
||||
BUG_ON(tail_to_skip != 0);
|
||||
goto out_update_size;
|
||||
}
|
||||
|
||||
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
|
||||
OCFS2_I(inode)->ip_clusters;
|
||||
|
||||
|
@ -825,6 +906,7 @@ static int ocfs2_extend_file(struct inode *inode,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
out_update_size:
|
||||
if (!tail_to_skip) {
|
||||
/* We're being called from ocfs2_setattr() which wants
|
||||
* us to update i_size */
|
||||
|
@ -834,7 +916,8 @@ static int ocfs2_extend_file(struct inode *inode,
|
|||
}
|
||||
|
||||
out_unlock:
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
@ -972,7 +1055,8 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
|
|||
|
||||
ret = ocfs2_meta_lock(inode, NULL, 0);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
if (ret != -ENOENT)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -1035,10 +1119,49 @@ static int ocfs2_write_remove_suid(struct inode *inode)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Will look for holes and unwritten extents in the range starting at
|
||||
* pos for count bytes (inclusive).
|
||||
*/
|
||||
static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
|
||||
size_t count)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned int extent_flags;
|
||||
u32 cpos, clusters, extent_len, phys_cpos;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
|
||||
clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
|
||||
|
||||
while (clusters) {
|
||||
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
|
||||
&extent_flags);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (extent_len > clusters)
|
||||
extent_len = clusters;
|
||||
|
||||
clusters -= extent_len;
|
||||
cpos += extent_len;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
|
||||
loff_t *ppos,
|
||||
size_t count,
|
||||
int appending)
|
||||
int appending,
|
||||
int *direct_io)
|
||||
{
|
||||
int ret = 0, meta_level = appending;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
|
@ -1089,6 +1212,49 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
|
|||
} else {
|
||||
saved_pos = *ppos;
|
||||
}
|
||||
|
||||
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
|
||||
loff_t end = saved_pos + count;
|
||||
|
||||
/*
|
||||
* Skip the O_DIRECT checks if we don't need
|
||||
* them.
|
||||
*/
|
||||
if (!direct_io || !(*direct_io))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Allowing concurrent direct writes means
|
||||
* i_size changes wouldn't be synchronized, so
|
||||
* one node could wind up truncating another
|
||||
* nodes writes.
|
||||
*/
|
||||
if (end > i_size_read(inode)) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't fill holes during direct io, so
|
||||
* check for them here. If any are found, the
|
||||
* caller will have to retake some cluster
|
||||
* locks and initiate the io as buffered.
|
||||
*/
|
||||
ret = ocfs2_check_range_for_holes(inode, saved_pos,
|
||||
count);
|
||||
if (ret == 1) {
|
||||
*direct_io = 0;
|
||||
ret = 0;
|
||||
} else if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The rest of this loop is concerned with legacy file
|
||||
* systems which don't support sparse files.
|
||||
*/
|
||||
|
||||
newsize = count + saved_pos;
|
||||
|
||||
mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
|
||||
|
@ -1141,55 +1307,264 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
|
||||
{
|
||||
const struct iovec *iov = *iovp;
|
||||
size_t base = *basep;
|
||||
|
||||
do {
|
||||
int copy = min(bytes, iov->iov_len - base);
|
||||
|
||||
bytes -= copy;
|
||||
base += copy;
|
||||
if (iov->iov_len == base) {
|
||||
iov++;
|
||||
base = 0;
|
||||
}
|
||||
} while (bytes);
|
||||
*iovp = iov;
|
||||
*basep = base;
|
||||
}
|
||||
|
||||
static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
|
||||
const struct iovec *cur_iov,
|
||||
size_t iov_offset)
|
||||
{
|
||||
int ret;
|
||||
char *buf;
|
||||
struct page *src_page = NULL;
|
||||
|
||||
buf = cur_iov->iov_base + iov_offset;
|
||||
|
||||
if (!segment_eq(get_fs(), KERNEL_DS)) {
|
||||
/*
|
||||
* Pull in the user page. We want to do this outside
|
||||
* of the meta data locks in order to preserve locking
|
||||
* order in case of page fault.
|
||||
*/
|
||||
ret = get_user_pages(current, current->mm,
|
||||
(unsigned long)buf & PAGE_CACHE_MASK, 1,
|
||||
0, 0, &src_page, NULL);
|
||||
if (ret == 1)
|
||||
bp->b_src_buf = kmap(src_page);
|
||||
else
|
||||
src_page = ERR_PTR(-EFAULT);
|
||||
} else {
|
||||
bp->b_src_buf = buf;
|
||||
}
|
||||
|
||||
return src_page;
|
||||
}
|
||||
|
||||
static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp,
|
||||
struct page *page)
|
||||
{
|
||||
if (page) {
|
||||
kunmap(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
size_t count,
|
||||
ssize_t o_direct_written)
|
||||
{
|
||||
int ret = 0;
|
||||
ssize_t copied, total = 0;
|
||||
size_t iov_offset = 0;
|
||||
const struct iovec *cur_iov = iov;
|
||||
struct ocfs2_buffered_write_priv bp;
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* handle partial DIO write. Adjust cur_iov if needed.
|
||||
*/
|
||||
ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
|
||||
|
||||
do {
|
||||
bp.b_cur_off = iov_offset;
|
||||
bp.b_cur_iov = cur_iov;
|
||||
|
||||
page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
|
||||
if (IS_ERR(page)) {
|
||||
ret = PTR_ERR(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied = ocfs2_buffered_write_cluster(file, *ppos, count,
|
||||
ocfs2_map_and_write_user_data,
|
||||
&bp);
|
||||
|
||||
ocfs2_put_write_source(&bp, page);
|
||||
|
||||
if (copied < 0) {
|
||||
mlog_errno(copied);
|
||||
ret = copied;
|
||||
goto out;
|
||||
}
|
||||
|
||||
total += copied;
|
||||
*ppos = *ppos + copied;
|
||||
count -= copied;
|
||||
|
||||
ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
|
||||
} while(count);
|
||||
|
||||
out:
|
||||
return total ? total : ret;
|
||||
}
|
||||
|
||||
static int ocfs2_check_iovec(const struct iovec *iov, size_t *counted,
|
||||
unsigned long *nr_segs)
|
||||
{
|
||||
size_t ocount; /* original count */
|
||||
unsigned long seg;
|
||||
|
||||
ocount = 0;
|
||||
for (seg = 0; seg < *nr_segs; seg++) {
|
||||
const struct iovec *iv = &iov[seg];
|
||||
|
||||
/*
|
||||
* If any segment has a negative length, or the cumulative
|
||||
* length ever wraps negative then return -EINVAL.
|
||||
*/
|
||||
ocount += iv->iov_len;
|
||||
if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
|
||||
return -EINVAL;
|
||||
if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
|
||||
continue;
|
||||
if (seg == 0)
|
||||
return -EFAULT;
|
||||
*nr_segs = seg;
|
||||
ocount -= iv->iov_len; /* This segment is no good */
|
||||
break;
|
||||
}
|
||||
|
||||
*counted = ocount;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
||||
const struct iovec *iov,
|
||||
unsigned long nr_segs,
|
||||
loff_t pos)
|
||||
{
|
||||
int ret, rw_level, have_alloc_sem = 0;
|
||||
struct file *filp = iocb->ki_filp;
|
||||
struct inode *inode = filp->f_path.dentry->d_inode;
|
||||
int appending = filp->f_flags & O_APPEND ? 1 : 0;
|
||||
int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
|
||||
int can_do_direct, sync = 0;
|
||||
ssize_t written = 0;
|
||||
size_t ocount; /* original count */
|
||||
size_t count; /* after file limit checks */
|
||||
loff_t *ppos = &iocb->ki_pos;
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
|
||||
mlog_entry("(0x%p, %u, '%.*s')\n", filp,
|
||||
mlog_entry("(0x%p, %u, '%.*s')\n", file,
|
||||
(unsigned int)nr_segs,
|
||||
filp->f_path.dentry->d_name.len,
|
||||
filp->f_path.dentry->d_name.name);
|
||||
file->f_path.dentry->d_name.len,
|
||||
file->f_path.dentry->d_name.name);
|
||||
|
||||
/* happy write of zero bytes */
|
||||
if (iocb->ki_left == 0)
|
||||
return 0;
|
||||
|
||||
ret = ocfs2_check_iovec(iov, &ocount, &nr_segs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
count = ocount;
|
||||
|
||||
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
|
||||
|
||||
appending = file->f_flags & O_APPEND ? 1 : 0;
|
||||
direct_io = file->f_flags & O_DIRECT ? 1 : 0;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
relock:
|
||||
/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
|
||||
if (filp->f_flags & O_DIRECT) {
|
||||
have_alloc_sem = 1;
|
||||
if (direct_io) {
|
||||
down_read(&inode->i_alloc_sem);
|
||||
have_alloc_sem = 1;
|
||||
}
|
||||
|
||||
/* concurrent O_DIRECT writes are allowed */
|
||||
rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
|
||||
rw_level = !direct_io;
|
||||
ret = ocfs2_rw_lock(inode, rw_level);
|
||||
if (ret < 0) {
|
||||
rw_level = -1;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
goto out_sems;
|
||||
}
|
||||
|
||||
ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos,
|
||||
iocb->ki_left, appending);
|
||||
can_do_direct = direct_io;
|
||||
ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
|
||||
iocb->ki_left, appending,
|
||||
&can_do_direct);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can't complete the direct I/O as requested, fall back to
|
||||
* buffered I/O.
|
||||
*/
|
||||
if (direct_io && !can_do_direct) {
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
up_read(&inode->i_alloc_sem);
|
||||
|
||||
have_alloc_sem = 0;
|
||||
rw_level = -1;
|
||||
|
||||
direct_io = 0;
|
||||
sync = 1;
|
||||
goto relock;
|
||||
}
|
||||
|
||||
if (!sync && ((file->f_flags & O_SYNC) || IS_SYNC(inode)))
|
||||
sync = 1;
|
||||
|
||||
/*
|
||||
* XXX: Is it ok to execute these checks a second time?
|
||||
*/
|
||||
ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Set pos so that sync_page_range_nolock() below understands
|
||||
* where to start from. We might've moved it around via the
|
||||
* calls above. The range we want to actually sync starts from
|
||||
* *ppos here.
|
||||
*
|
||||
*/
|
||||
pos = *ppos;
|
||||
|
||||
/* communicate with ocfs2_dio_end_io */
|
||||
ocfs2_iocb_set_rw_locked(iocb);
|
||||
ocfs2_iocb_set_rw_locked(iocb, rw_level);
|
||||
|
||||
ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);
|
||||
if (direct_io) {
|
||||
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
|
||||
ppos, count, ocount);
|
||||
if (written < 0) {
|
||||
ret = written;
|
||||
goto out_dio;
|
||||
}
|
||||
} else {
|
||||
written = ocfs2_file_buffered_write(file, ppos, iov, nr_segs,
|
||||
count, written);
|
||||
if (written < 0) {
|
||||
ret = written;
|
||||
if (ret != -EFAULT || ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out_dio:
|
||||
/* buffered aio wouldn't have proper lock coverage today */
|
||||
BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
|
||||
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
|
||||
|
||||
/*
|
||||
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
|
||||
|
@ -1207,13 +1582,102 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
|||
}
|
||||
|
||||
out:
|
||||
if (rw_level != -1)
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
|
||||
out_sems:
|
||||
if (have_alloc_sem)
|
||||
up_read(&inode->i_alloc_sem);
|
||||
if (rw_level != -1)
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
|
||||
if (written > 0 && sync) {
|
||||
ssize_t err;
|
||||
|
||||
err = sync_page_range_nolock(inode, file->f_mapping, pos, count);
|
||||
if (err < 0)
|
||||
written = err;
|
||||
}
|
||||
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
mlog_exit(ret);
|
||||
return written ? written : ret;
|
||||
}
|
||||
|
||||
static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf,
|
||||
struct splice_desc *sd)
|
||||
{
|
||||
int ret, count, total = 0;
|
||||
ssize_t copied = 0;
|
||||
struct ocfs2_splice_write_priv sp;
|
||||
|
||||
ret = buf->ops->pin(pipe, buf);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
sp.s_sd = sd;
|
||||
sp.s_buf = buf;
|
||||
sp.s_pipe = pipe;
|
||||
sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
|
||||
sp.s_buf_offset = buf->offset;
|
||||
|
||||
count = sd->len;
|
||||
if (count + sp.s_offset > PAGE_CACHE_SIZE)
|
||||
count = PAGE_CACHE_SIZE - sp.s_offset;
|
||||
|
||||
do {
|
||||
/*
|
||||
* splice wants us to copy up to one page at a
|
||||
* time. For pagesize > cluster size, this means we
|
||||
* might enter ocfs2_buffered_write_cluster() more
|
||||
* than once, so keep track of our progress here.
|
||||
*/
|
||||
copied = ocfs2_buffered_write_cluster(sd->file,
|
||||
(loff_t)sd->pos + total,
|
||||
count,
|
||||
ocfs2_map_and_write_splice_data,
|
||||
&sp);
|
||||
if (copied < 0) {
|
||||
mlog_errno(copied);
|
||||
ret = copied;
|
||||
goto out;
|
||||
}
|
||||
|
||||
count -= copied;
|
||||
sp.s_offset += copied;
|
||||
sp.s_buf_offset += copied;
|
||||
total += copied;
|
||||
} while (count);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
||||
return total ? total : ret;
|
||||
}
|
||||
|
||||
static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
|
||||
struct file *out,
|
||||
loff_t *ppos,
|
||||
size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
int ret, err;
|
||||
struct address_space *mapping = out->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
ret = __splice_from_pipe(pipe, out, ppos, len, flags,
|
||||
ocfs2_splice_write_actor);
|
||||
if (ret > 0) {
|
||||
*ppos += ret;
|
||||
|
||||
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
err = generic_osync_inode(inode, mapping,
|
||||
OSYNC_METADATA|OSYNC_DATA);
|
||||
if (err)
|
||||
ret = err;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1239,14 +1703,15 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0);
|
||||
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
|
||||
NULL);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* ok, we're done with i_size and alloc work */
|
||||
ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
|
||||
ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags);
|
||||
|
||||
out_unlock:
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
|
@ -1323,7 +1788,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
|
|||
}
|
||||
rw_level = 0;
|
||||
/* communicate with ocfs2_dio_end_io */
|
||||
ocfs2_iocb_set_rw_locked(iocb);
|
||||
ocfs2_iocb_set_rw_locked(iocb, rw_level);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -39,12 +39,17 @@ enum ocfs2_alloc_restarted {
|
|||
};
|
||||
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
u32 *cluster_start,
|
||||
u32 clusters_to_add,
|
||||
struct buffer_head *fe_bh,
|
||||
handle_t *handle,
|
||||
struct ocfs2_alloc_context *data_ac,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
enum ocfs2_alloc_restarted *reason);
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
||||
u32 clusters_to_add,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac);
|
||||
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
|
|
203
fs/ocfs2/inode.c
203
fs/ocfs2/inode.c
|
@ -89,24 +89,6 @@ void ocfs2_set_inode_flags(struct inode *inode)
|
|||
inode->i_flags |= S_DIRSYNC;
|
||||
}
|
||||
|
||||
struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
|
||||
u64 blkno,
|
||||
int delete_vote)
|
||||
{
|
||||
struct ocfs2_find_inode_args args;
|
||||
|
||||
/* ocfs2_ilookup_for_vote should *only* be called from the
|
||||
* vote thread */
|
||||
BUG_ON(current != osb->vote_task);
|
||||
|
||||
args.fi_blkno = blkno;
|
||||
args.fi_flags = OCFS2_FI_FLAG_NOWAIT;
|
||||
if (delete_vote)
|
||||
args.fi_flags |= OCFS2_FI_FLAG_DELETE;
|
||||
args.fi_ino = ino_from_blkno(osb->sb, blkno);
|
||||
return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
|
||||
}
|
||||
|
||||
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
|
@ -182,28 +164,6 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque)
|
|||
if (oi->ip_blkno != args->fi_blkno)
|
||||
goto bail;
|
||||
|
||||
/* OCFS2_FI_FLAG_NOWAIT is *only* set from
|
||||
* ocfs2_ilookup_for_vote which won't create an inode for one
|
||||
* that isn't found. The vote thread which doesn't want to get
|
||||
* an inode which is in the process of going away - otherwise
|
||||
* the call to __wait_on_freeing_inode in find_inode_fast will
|
||||
* cause it to deadlock on an inode which may be waiting on a
|
||||
* vote (or lock release) in delete_inode */
|
||||
if ((args->fi_flags & OCFS2_FI_FLAG_NOWAIT) &&
|
||||
(inode->i_state & (I_FREEING|I_CLEAR))) {
|
||||
/* As stated above, we're not going to return an
|
||||
* inode. In the case of a delete vote, the voting
|
||||
* code is going to signal the other node to go
|
||||
* ahead. Mark that state here, so this freeing inode
|
||||
* has the state when it gets to delete_inode. */
|
||||
if (args->fi_flags & OCFS2_FI_FLAG_DELETE) {
|
||||
spin_lock(&oi->ip_lock);
|
||||
ocfs2_mark_inode_remotely_deleted(inode);
|
||||
spin_unlock(&oi->ip_lock);
|
||||
}
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ret = 1;
|
||||
bail:
|
||||
mlog_exit(ret);
|
||||
|
@ -261,6 +221,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
|
||||
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
|
||||
|
||||
inode->i_version = 1;
|
||||
inode->i_generation = le32_to_cpu(fe->i_generation);
|
||||
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
|
||||
|
@ -272,8 +235,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
|||
if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
|
||||
inode->i_blocks = 0;
|
||||
else
|
||||
inode->i_blocks =
|
||||
ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
inode->i_mapping->a_ops = &ocfs2_aops;
|
||||
inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
|
||||
inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
|
||||
|
@ -288,10 +250,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
|||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)fe->i_blkno);
|
||||
|
||||
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
|
||||
OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
|
||||
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
|
||||
|
||||
inode->i_nlink = le16_to_cpu(fe->i_links_count);
|
||||
|
||||
if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
|
||||
|
@ -347,6 +305,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
|||
|
||||
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
|
||||
OCFS2_LOCK_TYPE_META, 0, inode);
|
||||
|
||||
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
|
||||
OCFS2_LOCK_TYPE_OPEN, 0, inode);
|
||||
}
|
||||
|
||||
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
|
||||
|
@ -421,7 +382,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
|
|||
* cluster lock before trusting anything anyway.
|
||||
*/
|
||||
can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
|
||||
&& !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK)
|
||||
&& !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
|
||||
&& !ocfs2_mount_local(osb);
|
||||
|
||||
/*
|
||||
|
@ -438,7 +399,17 @@ static int ocfs2_read_locked_inode(struct inode *inode,
|
|||
OCFS2_LOCK_TYPE_META,
|
||||
generation, inode);
|
||||
|
||||
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
|
||||
OCFS2_LOCK_TYPE_OPEN,
|
||||
0, inode);
|
||||
|
||||
if (can_lock) {
|
||||
status = ocfs2_open_lock(inode);
|
||||
if (status) {
|
||||
make_bad_inode(inode);
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
status = ocfs2_meta_lock(inode, NULL, 0);
|
||||
if (status) {
|
||||
make_bad_inode(inode);
|
||||
|
@ -447,6 +418,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
|
|||
}
|
||||
}
|
||||
|
||||
if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
|
||||
status = ocfs2_try_open_lock(inode, 0);
|
||||
if (status) {
|
||||
make_bad_inode(inode);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
|
||||
can_lock ? inode : NULL);
|
||||
if (status < 0) {
|
||||
|
@ -507,50 +486,56 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
|
|||
struct buffer_head *fe_bh)
|
||||
{
|
||||
int status = 0;
|
||||
handle_t *handle = NULL;
|
||||
struct ocfs2_truncate_context *tc = NULL;
|
||||
struct ocfs2_dinode *fe;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
||||
|
||||
/* zero allocation, zero truncate :) */
|
||||
if (!fe->i_clusters)
|
||||
goto bail;
|
||||
if (fe->i_clusters) {
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (IS_ERR(handle)) {
|
||||
status = PTR_ERR(handle);
|
||||
mlog_errno(status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (IS_ERR(handle)) {
|
||||
status = PTR_ERR(handle);
|
||||
status = ocfs2_journal_access(handle, inode, fe_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
i_size_write(inode, 0);
|
||||
|
||||
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
handle = NULL;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
||||
status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_set_inode_size(handle, inode, fe_bh, 0ULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
handle = NULL;
|
||||
|
||||
status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
bail:
|
||||
out:
|
||||
if (handle)
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
mlog_exit(status);
|
||||
return status;
|
||||
}
|
||||
|
@ -678,10 +663,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
|
|||
struct inode *orphan_dir_inode = NULL;
|
||||
struct buffer_head *orphan_dir_bh = NULL;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct ocfs2_dinode *di;
|
||||
|
||||
/* We've already voted on this so it should be readonly - no
|
||||
* spinlock needed. */
|
||||
orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
|
||||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
|
||||
|
||||
status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
|
||||
if (status)
|
||||
|
@ -839,11 +824,20 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_request_delete_vote(inode);
|
||||
/* -EBUSY means that other nodes are still using the
|
||||
* inode. We're done here though, so avoid doing anything on
|
||||
* disk and let them worry about deleting it. */
|
||||
if (status == -EBUSY) {
|
||||
/*
|
||||
* This is how ocfs2 determines whether an inode is still live
|
||||
* within the cluster. Every node takes a shared read lock on
|
||||
* the inode open lock in ocfs2_read_locked_inode(). When we
|
||||
* get to ->delete_inode(), each node tries to convert it's
|
||||
* lock to an exclusive. Trylocks are serialized by the inode
|
||||
* meta data lock. If the upconvert suceeds, we know the inode
|
||||
* is no longer live and can be deleted.
|
||||
*
|
||||
* Though we call this with the meta data lock held, the
|
||||
* trylock keeps us from ABBA deadlock.
|
||||
*/
|
||||
status = ocfs2_try_open_lock(inode, 1);
|
||||
if (status == -EAGAIN) {
|
||||
status = 0;
|
||||
mlog(0, "Skipping delete of %llu because it is in use on"
|
||||
"other nodes\n", (unsigned long long)oi->ip_blkno);
|
||||
|
@ -854,21 +848,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
spin_lock(&oi->ip_lock);
|
||||
if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) {
|
||||
/* Nobody knew which slot this inode was orphaned
|
||||
* into. This may happen during node death and
|
||||
* recovery knows how to clean it up so we can safely
|
||||
* ignore this inode for now on. */
|
||||
mlog(0, "Nobody knew where inode %llu was orphaned!\n",
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
} else {
|
||||
*wipe = 1;
|
||||
|
||||
mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
|
||||
(unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
|
||||
}
|
||||
spin_unlock(&oi->ip_lock);
|
||||
*wipe = 1;
|
||||
mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n",
|
||||
(unsigned long long)oi->ip_blkno,
|
||||
le16_to_cpu(di->i_orphaned_slot));
|
||||
|
||||
bail:
|
||||
return status;
|
||||
|
@ -1001,11 +984,16 @@ void ocfs2_clear_inode(struct inode *inode)
|
|||
mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
|
||||
"Inode=%lu\n", inode->i_ino);
|
||||
|
||||
/* For remove delete_inode vote, we hold open lock before,
|
||||
* now it is time to unlock PR and EX open locks. */
|
||||
ocfs2_open_unlock(inode);
|
||||
|
||||
/* Do these before all the other work so that we don't bounce
|
||||
* the vote thread while waiting to destroy the locks. */
|
||||
ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
|
||||
ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
|
||||
ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
|
||||
ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
|
||||
|
||||
/* We very well may get a clear_inode before all an inodes
|
||||
* metadata has hit disk. Of course, we can't drop any cluster
|
||||
|
@ -1020,8 +1008,7 @@ void ocfs2_clear_inode(struct inode *inode)
|
|||
"Clear inode of %llu, inode has io markers\n",
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
|
||||
ocfs2_extent_map_drop(inode, 0);
|
||||
ocfs2_extent_map_init(inode);
|
||||
ocfs2_extent_map_trunc(inode, 0);
|
||||
|
||||
status = ocfs2_drop_inode_locks(inode);
|
||||
if (status < 0)
|
||||
|
@ -1030,6 +1017,7 @@ void ocfs2_clear_inode(struct inode *inode)
|
|||
ocfs2_lock_res_free(&oi->ip_rw_lockres);
|
||||
ocfs2_lock_res_free(&oi->ip_meta_lockres);
|
||||
ocfs2_lock_res_free(&oi->ip_data_lockres);
|
||||
ocfs2_lock_res_free(&oi->ip_open_lockres);
|
||||
|
||||
ocfs2_metadata_cache_purge(inode);
|
||||
|
||||
|
@ -1086,9 +1074,6 @@ void ocfs2_drop_inode(struct inode *inode)
|
|||
mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
|
||||
(unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
|
||||
|
||||
/* Testing ip_orphaned_slot here wouldn't work because we may
|
||||
* not have gotten a delete_inode vote from any other nodes
|
||||
* yet. */
|
||||
if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
|
||||
generic_delete_inode(inode);
|
||||
else
|
||||
|
@ -1121,8 +1106,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
tmperr = ocfs2_extent_map_get_blocks(inode, block, 1,
|
||||
&p_blkno, NULL);
|
||||
tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
|
||||
NULL);
|
||||
if (tmperr < 0) {
|
||||
mlog_errno(tmperr);
|
||||
goto fail;
|
||||
|
@ -1259,7 +1244,7 @@ void ocfs2_refresh_inode(struct inode *inode,
|
|||
if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
|
||||
inode->i_blocks = 0;
|
||||
else
|
||||
inode->i_blocks = ocfs2_align_bytes_to_sectors(i_size_read(inode));
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
|
||||
inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
|
||||
inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#ifndef OCFS2_INODE_H
|
||||
#define OCFS2_INODE_H
|
||||
|
||||
#include "extent_map.h"
|
||||
|
||||
/* OCFS2 Inode Private Data */
|
||||
struct ocfs2_inode_info
|
||||
{
|
||||
|
@ -34,6 +36,7 @@ struct ocfs2_inode_info
|
|||
struct ocfs2_lock_res ip_rw_lockres;
|
||||
struct ocfs2_lock_res ip_meta_lockres;
|
||||
struct ocfs2_lock_res ip_data_lockres;
|
||||
struct ocfs2_lock_res ip_open_lockres;
|
||||
|
||||
/* protects allocation changes on this inode. */
|
||||
struct rw_semaphore ip_alloc_sem;
|
||||
|
@ -42,9 +45,7 @@ struct ocfs2_inode_info
|
|||
spinlock_t ip_lock;
|
||||
u32 ip_open_count;
|
||||
u32 ip_clusters;
|
||||
struct ocfs2_extent_map ip_map;
|
||||
struct list_head ip_io_markers;
|
||||
int ip_orphaned_slot;
|
||||
|
||||
struct mutex ip_io_mutex;
|
||||
|
||||
|
@ -64,6 +65,8 @@ struct ocfs2_inode_info
|
|||
|
||||
struct ocfs2_caching_info ip_metadata_cache;
|
||||
|
||||
struct ocfs2_extent_map ip_extent_map;
|
||||
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
|
@ -117,14 +120,9 @@ void ocfs2_delete_inode(struct inode *inode);
|
|||
void ocfs2_drop_inode(struct inode *inode);
|
||||
|
||||
/* Flags for ocfs2_iget() */
|
||||
#define OCFS2_FI_FLAG_NOWAIT 0x1
|
||||
#define OCFS2_FI_FLAG_DELETE 0x2
|
||||
#define OCFS2_FI_FLAG_SYSFILE 0x4
|
||||
#define OCFS2_FI_FLAG_NOLOCK 0x8
|
||||
#define OCFS2_FI_FLAG_SYSFILE 0x4
|
||||
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x8
|
||||
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
|
||||
struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
|
||||
u64 blkno,
|
||||
int delete_vote);
|
||||
int ocfs2_inode_init_private(struct inode *inode);
|
||||
int ocfs2_inode_revalidate(struct dentry *dentry);
|
||||
int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
|
||||
|
@ -144,4 +142,11 @@ int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
|
|||
|
||||
void ocfs2_set_inode_flags(struct inode *inode);
|
||||
|
||||
static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
|
||||
{
|
||||
int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9;
|
||||
|
||||
return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
|
||||
}
|
||||
|
||||
#endif /* OCFS2_INODE_H */
|
||||
|
|
|
@ -649,29 +649,20 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
|
|||
static int ocfs2_force_read_journal(struct inode *inode)
|
||||
{
|
||||
int status = 0;
|
||||
int i, p_blocks;
|
||||
u64 v_blkno, p_blkno;
|
||||
#define CONCURRENT_JOURNAL_FILL 32
|
||||
int i;
|
||||
u64 v_blkno, p_blkno, p_blocks, num_blocks;
|
||||
#define CONCURRENT_JOURNAL_FILL 32ULL
|
||||
struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
BUG_ON(inode->i_blocks !=
|
||||
ocfs2_align_bytes_to_sectors(i_size_read(inode)));
|
||||
|
||||
memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
|
||||
|
||||
mlog(0, "Force reading %llu blocks\n",
|
||||
(unsigned long long)(inode->i_blocks >>
|
||||
(inode->i_sb->s_blocksize_bits - 9)));
|
||||
|
||||
num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
|
||||
v_blkno = 0;
|
||||
while (v_blkno <
|
||||
(inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))) {
|
||||
|
||||
while (v_blkno < num_blocks) {
|
||||
status = ocfs2_extent_map_get_blocks(inode, v_blkno,
|
||||
1, &p_blkno,
|
||||
&p_blocks);
|
||||
&p_blkno, &p_blocks, NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
@ -1306,7 +1297,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
|
|||
continue;
|
||||
|
||||
iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
|
||||
OCFS2_FI_FLAG_NOLOCK);
|
||||
OCFS2_FI_FLAG_ORPHAN_RECOVERY);
|
||||
if (IS_ERR(iter))
|
||||
continue;
|
||||
|
||||
|
@ -1418,7 +1409,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
|||
/* Set the proper information to get us going into
|
||||
* ocfs2_delete_inode. */
|
||||
oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
|
||||
oi->ip_orphaned_slot = slot;
|
||||
spin_unlock(&oi->ip_lock);
|
||||
|
||||
iput(inode);
|
||||
|
|
|
@ -390,7 +390,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
|
|||
/* We may be deleting metadata blocks, so metadata alloc dinode +
|
||||
one desc. block for each possible delete. */
|
||||
if (tree_depth && next_free == 1 &&
|
||||
le32_to_cpu(last_el->l_recs[i].e_clusters) == clusters_to_del)
|
||||
ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
|
||||
credits += 1 + tree_depth;
|
||||
|
||||
/* update to the truncate log. */
|
||||
|
|
|
@ -85,8 +85,11 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
int ret = 0, lock_level = 0;
|
||||
struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
|
||||
|
||||
/* We don't want to support shared writable mappings yet. */
|
||||
if (!ocfs2_mount_local(osb) &&
|
||||
/*
|
||||
* Only support shared writeable mmap for local mounts which
|
||||
* don't know about holes.
|
||||
*/
|
||||
if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
|
||||
((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
|
||||
((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
|
||||
mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
|
||||
|
|
|
@ -175,8 +175,6 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
|
|||
|
||||
inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
|
||||
if (IS_ERR(inode)) {
|
||||
mlog(ML_ERROR, "Unable to create inode %llu\n",
|
||||
(unsigned long long)blkno);
|
||||
ret = ERR_PTR(-EACCES);
|
||||
goto bail_unlock;
|
||||
}
|
||||
|
@ -189,7 +187,6 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
|
|||
* unlink. */
|
||||
spin_lock(&oi->ip_lock);
|
||||
oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
|
||||
oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
|
||||
spin_unlock(&oi->ip_lock);
|
||||
|
||||
bail_add:
|
||||
|
@ -288,7 +285,7 @@ static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
|
|||
|
||||
i_size_write(inode, inode->i_sb->s_blocksize);
|
||||
inode->i_nlink = 2;
|
||||
inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -1486,8 +1483,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
|
|||
struct buffer_head **bhs = NULL;
|
||||
const char *c;
|
||||
struct super_block *sb = osb->sb;
|
||||
u64 p_blkno;
|
||||
int p_blocks;
|
||||
u64 p_blkno, p_blocks;
|
||||
int virtual, blocks, status, i, bytes_left;
|
||||
|
||||
bytes_left = i_size_read(inode) + 1;
|
||||
|
@ -1514,8 +1510,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
|
|||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
|
||||
&p_blocks);
|
||||
status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks,
|
||||
NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
@ -1674,8 +1670,11 @@ static int ocfs2_symlink(struct inode *dir,
|
|||
inode->i_rdev = 0;
|
||||
newsize = l - 1;
|
||||
if (l > ocfs2_fast_symlink_chars(sb)) {
|
||||
u32 offset = 0;
|
||||
|
||||
inode->i_op = &ocfs2_symlink_inode_operations;
|
||||
status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
|
||||
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
|
||||
new_fe_bh,
|
||||
handle, data_ac, NULL,
|
||||
NULL);
|
||||
if (status < 0) {
|
||||
|
@ -1689,7 +1688,7 @@ static int ocfs2_symlink(struct inode *dir,
|
|||
goto bail;
|
||||
}
|
||||
i_size_write(inode, newsize);
|
||||
inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
|
||||
inode->i_blocks = ocfs2_inode_sector_count(inode);
|
||||
} else {
|
||||
inode->i_op = &ocfs2_fast_symlink_inode_operations;
|
||||
memcpy((char *) fe->id2.i_symlink, symname, l);
|
||||
|
@ -2222,9 +2221,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
|||
/* Record which orphan dir our inode now resides
|
||||
* in. delete_inode will use this to determine which orphan
|
||||
* dir to lock. */
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
|
||||
|
||||
mlog(0, "Inode %llu orphaned in slot %d\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
|
||||
|
|
|
@ -46,11 +46,6 @@
|
|||
#include "endian.h"
|
||||
#include "ocfs2_lockid.h"
|
||||
|
||||
struct ocfs2_extent_map {
|
||||
u32 em_clusters;
|
||||
struct rb_root em_extents;
|
||||
};
|
||||
|
||||
/* Most user visible OCFS2 inodes will have very few pieces of
|
||||
* metadata, but larger files (including bitmaps, etc) must be taken
|
||||
* into account when designing an access scheme. We allow a small
|
||||
|
@ -303,6 +298,13 @@ static inline int ocfs2_should_order_data(struct inode *inode)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
|
||||
{
|
||||
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set / clear functions because cluster events can make these happen
|
||||
* in parallel so we want the transitions to be atomic. this also
|
||||
* means that any future flags osb_flags must be protected by spinlock
|
||||
|
@ -461,6 +463,49 @@ static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes)
|
|||
return (unsigned long)((bytes + 511) >> 9);
|
||||
}
|
||||
|
||||
static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
|
||||
unsigned long pg_index)
|
||||
{
|
||||
u32 clusters = pg_index;
|
||||
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||
|
||||
if (unlikely(PAGE_CACHE_SHIFT > cbits))
|
||||
clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
|
||||
else if (PAGE_CACHE_SHIFT < cbits)
|
||||
clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the 1st page index which covers the given clusters.
|
||||
*/
|
||||
static inline unsigned long ocfs2_align_clusters_to_page_index(struct super_block *sb,
|
||||
u32 clusters)
|
||||
{
|
||||
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||
unsigned long index = clusters;
|
||||
|
||||
if (PAGE_CACHE_SHIFT > cbits) {
|
||||
index = clusters >> (PAGE_CACHE_SHIFT - cbits);
|
||||
} else if (PAGE_CACHE_SHIFT < cbits) {
|
||||
index = clusters << (cbits - PAGE_CACHE_SHIFT);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
|
||||
{
|
||||
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||
unsigned int pages_per_cluster = 1;
|
||||
|
||||
if (PAGE_CACHE_SHIFT < cbits)
|
||||
pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
|
||||
|
||||
return pages_per_cluster;
|
||||
}
|
||||
|
||||
#define ocfs2_set_bit ext2_set_bit
|
||||
#define ocfs2_clear_bit ext2_clear_bit
|
||||
#define ocfs2_test_bit ext2_test_bit
|
||||
|
|
|
@ -86,7 +86,8 @@
|
|||
OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
|
||||
|
||||
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
|
||||
#define OCFS2_FEATURE_INCOMPAT_SUPP OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
|
||||
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
|
||||
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
|
||||
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
|
||||
|
||||
/*
|
||||
|
@ -154,6 +155,12 @@
|
|||
#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */
|
||||
#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
|
||||
|
||||
/*
|
||||
* Extent record flags (e_node.leaf.flags)
|
||||
*/
|
||||
#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but
|
||||
* unwritten */
|
||||
|
||||
/*
|
||||
* ioctl commands
|
||||
*/
|
||||
|
@ -282,10 +289,21 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
|
|||
/*
|
||||
* On disk extent record for OCFS2
|
||||
* It describes a range of clusters on disk.
|
||||
*
|
||||
* Length fields are divided into interior and leaf node versions.
|
||||
* This leaves room for a flags field (OCFS2_EXT_*) in the leaf nodes.
|
||||
*/
|
||||
struct ocfs2_extent_rec {
|
||||
/*00*/ __le32 e_cpos; /* Offset into the file, in clusters */
|
||||
__le32 e_clusters; /* Clusters covered by this extent */
|
||||
union {
|
||||
__le32 e_int_clusters; /* Clusters covered by all children */
|
||||
struct {
|
||||
__le16 e_leaf_clusters; /* Clusters covered by this
|
||||
extent */
|
||||
__u8 e_reserved1;
|
||||
__u8 e_flags; /* Extent flags */
|
||||
};
|
||||
};
|
||||
__le64 e_blkno; /* Physical disk offset, in blocks */
|
||||
/*10*/
|
||||
};
|
||||
|
@ -311,7 +329,10 @@ struct ocfs2_extent_list {
|
|||
/*00*/ __le16 l_tree_depth; /* Extent tree depth from this
|
||||
point. 0 means data extents
|
||||
hang directly off this
|
||||
header (a leaf) */
|
||||
header (a leaf)
|
||||
NOTE: The high 8 bits cannot be
|
||||
used - tree_depth is never that big.
|
||||
*/
|
||||
__le16 l_count; /* Number of extent records */
|
||||
__le16 l_next_free_rec; /* Next unused extent slot */
|
||||
__le16 l_reserved1;
|
||||
|
@ -446,7 +467,9 @@ struct ocfs2_dinode {
|
|||
__le32 i_ctime_nsec;
|
||||
__le32 i_mtime_nsec;
|
||||
__le32 i_attr;
|
||||
__le32 i_reserved1;
|
||||
__le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
|
||||
was set in i_flags */
|
||||
__le16 i_reserved1;
|
||||
/*70*/ __le64 i_reserved2[8];
|
||||
/*B8*/ union {
|
||||
__le64 i_pad1; /* Generic way to refer to this
|
||||
|
|
|
@ -44,6 +44,7 @@ enum ocfs2_lock_type {
|
|||
OCFS2_LOCK_TYPE_RENAME,
|
||||
OCFS2_LOCK_TYPE_RW,
|
||||
OCFS2_LOCK_TYPE_DENTRY,
|
||||
OCFS2_LOCK_TYPE_OPEN,
|
||||
OCFS2_NUM_LOCK_TYPES
|
||||
};
|
||||
|
||||
|
@ -69,6 +70,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
|
|||
case OCFS2_LOCK_TYPE_DENTRY:
|
||||
c = 'N';
|
||||
break;
|
||||
case OCFS2_LOCK_TYPE_OPEN:
|
||||
c = 'O';
|
||||
break;
|
||||
default:
|
||||
c = '\0';
|
||||
}
|
||||
|
@ -85,6 +89,7 @@ static char *ocfs2_lock_type_strings[] = {
|
|||
* important job it does, anyway. */
|
||||
[OCFS2_LOCK_TYPE_RW] = "Write/Read",
|
||||
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
|
||||
[OCFS2_LOCK_TYPE_OPEN] = "Open",
|
||||
};
|
||||
|
||||
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
|
||||
|
|
|
@ -197,7 +197,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
|
|||
goto bail;
|
||||
}
|
||||
|
||||
status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &blkno, NULL);
|
||||
status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
|
|
@ -381,8 +381,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
|
|||
le32_to_cpu(fe->i_clusters)));
|
||||
spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
|
||||
i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
|
||||
alloc_inode->i_blocks =
|
||||
ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode));
|
||||
alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
|
|
|
@ -806,9 +806,6 @@ static int __init ocfs2_init(void)
|
|||
|
||||
ocfs2_print_version();
|
||||
|
||||
if (init_ocfs2_extent_maps())
|
||||
return -ENOMEM;
|
||||
|
||||
status = init_ocfs2_uptodate_cache();
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -837,7 +834,6 @@ static int __init ocfs2_init(void)
|
|||
if (status < 0) {
|
||||
ocfs2_free_mem_caches();
|
||||
exit_ocfs2_uptodate_cache();
|
||||
exit_ocfs2_extent_maps();
|
||||
}
|
||||
|
||||
mlog_exit(status);
|
||||
|
@ -863,8 +859,6 @@ static void __exit ocfs2_exit(void)
|
|||
|
||||
unregister_filesystem(&ocfs2_fs_type);
|
||||
|
||||
exit_ocfs2_extent_maps();
|
||||
|
||||
exit_ocfs2_uptodate_cache();
|
||||
|
||||
mlog_exit_void();
|
||||
|
@ -963,6 +957,7 @@ static void ocfs2_inode_init_once(void *data,
|
|||
ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
|
||||
ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
|
||||
ocfs2_lock_res_init_once(&oi->ip_data_lockres);
|
||||
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
|
||||
|
||||
ocfs2_metadata_cache_init(&oi->vfs_inode);
|
||||
|
||||
|
|
289
fs/ocfs2/vote.c
289
fs/ocfs2/vote.c
|
@ -63,17 +63,10 @@ struct ocfs2_msg_hdr
|
|||
__be32 h_node_num; /* node sending this particular message. */
|
||||
};
|
||||
|
||||
/* OCFS2_MAX_FILENAME_LEN is 255 characters, but we want to align this
|
||||
* for the network. */
|
||||
#define OCFS2_VOTE_FILENAME_LEN 256
|
||||
struct ocfs2_vote_msg
|
||||
{
|
||||
struct ocfs2_msg_hdr v_hdr;
|
||||
union {
|
||||
__be32 v_generic1;
|
||||
__be32 v_orphaned_slot; /* Used during delete votes */
|
||||
__be32 v_nlink; /* Used during unlink votes */
|
||||
} md1; /* Message type dependant 1 */
|
||||
__be32 v_reserved1;
|
||||
};
|
||||
|
||||
/* Responses are given these values to maintain backwards
|
||||
|
@ -86,7 +79,6 @@ struct ocfs2_response_msg
|
|||
{
|
||||
struct ocfs2_msg_hdr r_hdr;
|
||||
__be32 r_response;
|
||||
__be32 r_orphaned_slot;
|
||||
};
|
||||
|
||||
struct ocfs2_vote_work {
|
||||
|
@ -96,7 +88,6 @@ struct ocfs2_vote_work {
|
|||
|
||||
enum ocfs2_vote_request {
|
||||
OCFS2_VOTE_REQ_INVALID = 0,
|
||||
OCFS2_VOTE_REQ_DELETE,
|
||||
OCFS2_VOTE_REQ_MOUNT,
|
||||
OCFS2_VOTE_REQ_UMOUNT,
|
||||
OCFS2_VOTE_REQ_LAST
|
||||
|
@ -151,135 +142,23 @@ static void ocfs2_process_umount_request(struct ocfs2_super *osb,
|
|||
ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num);
|
||||
}
|
||||
|
||||
void ocfs2_mark_inode_remotely_deleted(struct inode *inode)
|
||||
{
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
|
||||
assert_spin_locked(&oi->ip_lock);
|
||||
/* We set the SKIP_DELETE flag on the inode so we don't try to
|
||||
* delete it in delete_inode ourselves, thus avoiding
|
||||
* unecessary lock pinging. If the other node failed to wipe
|
||||
* the inode as a result of a crash, then recovery will pick
|
||||
* up the slack. */
|
||||
oi->ip_flags |= OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE;
|
||||
}
|
||||
|
||||
static int ocfs2_process_delete_request(struct inode *inode,
|
||||
int *orphaned_slot)
|
||||
{
|
||||
int response = OCFS2_RESPONSE_BUSY;
|
||||
|
||||
mlog(0, "DELETE vote on inode %lu, read lnk_cnt = %u, slot = %d\n",
|
||||
inode->i_ino, inode->i_nlink, *orphaned_slot);
|
||||
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
|
||||
/* Whatever our vote response is, we want to make sure that
|
||||
* the orphaned slot is recorded properly on this node *and*
|
||||
* on the requesting node. Technically, if the requesting node
|
||||
* did not know which slot the inode is orphaned in but we
|
||||
* respond with BUSY he doesn't actually need the orphaned
|
||||
* slot, but it doesn't hurt to do it here anyway. */
|
||||
if ((*orphaned_slot) != OCFS2_INVALID_SLOT) {
|
||||
mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot !=
|
||||
OCFS2_INVALID_SLOT &&
|
||||
OCFS2_I(inode)->ip_orphaned_slot !=
|
||||
(*orphaned_slot),
|
||||
"Inode %llu: This node thinks it's "
|
||||
"orphaned in slot %d, messaged it's in %d\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
OCFS2_I(inode)->ip_orphaned_slot,
|
||||
*orphaned_slot);
|
||||
|
||||
mlog(0, "Setting orphaned slot for inode %llu to %d\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
*orphaned_slot);
|
||||
|
||||
OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot;
|
||||
} else {
|
||||
mlog(0, "Sending back orphaned slot %d for inode %llu\n",
|
||||
OCFS2_I(inode)->ip_orphaned_slot,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
|
||||
*orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
|
||||
}
|
||||
|
||||
/* vote no if the file is still open. */
|
||||
if (OCFS2_I(inode)->ip_open_count) {
|
||||
mlog(0, "open count = %u\n",
|
||||
OCFS2_I(inode)->ip_open_count);
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
goto done;
|
||||
}
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
|
||||
/* directories are a bit ugly... What if someone is sitting in
|
||||
* it? We want to make sure the inode is removed completely as
|
||||
* a result of the iput in process_vote. */
|
||||
if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) {
|
||||
mlog(0, "i_count = %u\n", atomic_read(&inode->i_count));
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (filemap_fdatawrite(inode->i_mapping)) {
|
||||
mlog(ML_ERROR, "Could not sync inode %llu for delete!\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
goto done;
|
||||
}
|
||||
sync_mapping_buffers(inode->i_mapping);
|
||||
truncate_inode_pages(inode->i_mapping, 0);
|
||||
ocfs2_extent_map_trunc(inode, 0);
|
||||
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
/* double check open count - someone might have raced this
|
||||
* thread into ocfs2_file_open while we were writing out
|
||||
* data. If we're to allow a wipe of this inode now, we *must*
|
||||
* hold the spinlock until we've marked it. */
|
||||
if (OCFS2_I(inode)->ip_open_count) {
|
||||
mlog(0, "Raced to wipe! open count = %u\n",
|
||||
OCFS2_I(inode)->ip_open_count);
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Mark the inode as being wiped from disk. */
|
||||
ocfs2_mark_inode_remotely_deleted(inode);
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
|
||||
/* Not sure this is necessary anymore. */
|
||||
d_prune_aliases(inode);
|
||||
|
||||
/* If we get here, then we're voting 'yes', so commit the
|
||||
* delete on our side. */
|
||||
response = OCFS2_RESPONSE_OK;
|
||||
done:
|
||||
return response;
|
||||
}
|
||||
|
||||
static void ocfs2_process_vote(struct ocfs2_super *osb,
|
||||
struct ocfs2_vote_msg *msg)
|
||||
{
|
||||
int net_status, vote_response;
|
||||
int orphaned_slot = 0;
|
||||
unsigned int node_num, generation;
|
||||
unsigned int node_num;
|
||||
u64 blkno;
|
||||
enum ocfs2_vote_request request;
|
||||
struct inode *inode = NULL;
|
||||
struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
|
||||
struct ocfs2_response_msg response;
|
||||
|
||||
/* decode the network mumbo jumbo into local variables. */
|
||||
request = be32_to_cpu(hdr->h_request);
|
||||
blkno = be64_to_cpu(hdr->h_blkno);
|
||||
generation = be32_to_cpu(hdr->h_generation);
|
||||
node_num = be32_to_cpu(hdr->h_node_num);
|
||||
if (request == OCFS2_VOTE_REQ_DELETE)
|
||||
orphaned_slot = be32_to_cpu(msg->md1.v_orphaned_slot);
|
||||
|
||||
mlog(0, "processing vote: request = %u, blkno = %llu, "
|
||||
"generation = %u, node_num = %u, priv1 = %u\n", request,
|
||||
(unsigned long long)blkno, generation, node_num,
|
||||
be32_to_cpu(msg->md1.v_generic1));
|
||||
mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n",
|
||||
request, (unsigned long long)blkno, node_num);
|
||||
|
||||
if (!ocfs2_is_valid_vote_request(request)) {
|
||||
mlog(ML_ERROR, "Invalid vote request %d from node %u\n",
|
||||
|
@ -302,52 +181,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
|
|||
break;
|
||||
}
|
||||
|
||||
/* We cannot process the remaining message types before we're
|
||||
* fully mounted. It's perfectly safe however to send a 'yes'
|
||||
* response as we can't possibly have any of the state they're
|
||||
* asking us to modify yet. */
|
||||
if (atomic_read(&osb->vol_state) == VOLUME_INIT)
|
||||
goto respond;
|
||||
|
||||
/* If we get here, then the request is against an inode. */
|
||||
inode = ocfs2_ilookup_for_vote(osb, blkno,
|
||||
request == OCFS2_VOTE_REQ_DELETE);
|
||||
|
||||
/* Not finding the inode is perfectly valid - it means we're
|
||||
* not interested in what the other node is about to do to it
|
||||
* so in those cases we automatically respond with an
|
||||
* affirmative. Cluster locking ensures that we won't race
|
||||
* interest in the inode with this vote request. */
|
||||
if (!inode)
|
||||
goto respond;
|
||||
|
||||
/* Check generation values. It's possible for us to get a
|
||||
* request against a stale inode. If so then we proceed as if
|
||||
* we had not found an inode in the first place. */
|
||||
if (inode->i_generation != generation) {
|
||||
mlog(0, "generation passed %u != inode generation = %u, "
|
||||
"ip_flags = %x, ip_blkno = %llu, msg %llu, i_count = %u, "
|
||||
"message type = %u\n", generation, inode->i_generation,
|
||||
OCFS2_I(inode)->ip_flags,
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)blkno, atomic_read(&inode->i_count),
|
||||
request);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
goto respond;
|
||||
}
|
||||
|
||||
switch (request) {
|
||||
case OCFS2_VOTE_REQ_DELETE:
|
||||
vote_response = ocfs2_process_delete_request(inode,
|
||||
&orphaned_slot);
|
||||
break;
|
||||
default:
|
||||
mlog(ML_ERROR, "node %u, invalid request: %u\n",
|
||||
node_num, request);
|
||||
vote_response = OCFS2_RESPONSE_BAD_MSG;
|
||||
}
|
||||
|
||||
respond:
|
||||
/* Response struture is small so we just put it on the stack
|
||||
* and stuff it inline. */
|
||||
|
@ -357,7 +190,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
|
|||
response.r_hdr.h_generation = hdr->h_generation;
|
||||
response.r_hdr.h_node_num = cpu_to_be32(osb->node_num);
|
||||
response.r_response = cpu_to_be32(vote_response);
|
||||
response.r_orphaned_slot = cpu_to_be32(orphaned_slot);
|
||||
|
||||
net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE,
|
||||
osb->net_key,
|
||||
|
@ -373,9 +205,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
|
|||
&& net_status != -ENOTCONN)
|
||||
mlog(ML_ERROR, "message to node %u fails with error %d!\n",
|
||||
node_num, net_status);
|
||||
|
||||
if (inode)
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb)
|
||||
|
@ -634,8 +463,7 @@ static int ocfs2_broadcast_vote(struct ocfs2_super *osb,
|
|||
static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
|
||||
u64 blkno,
|
||||
unsigned int generation,
|
||||
enum ocfs2_vote_request type,
|
||||
u32 priv)
|
||||
enum ocfs2_vote_request type)
|
||||
{
|
||||
struct ocfs2_vote_msg *request;
|
||||
struct ocfs2_msg_hdr *hdr;
|
||||
|
@ -651,8 +479,6 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb,
|
|||
hdr->h_request = cpu_to_be32(type);
|
||||
hdr->h_blkno = cpu_to_be64(blkno);
|
||||
hdr->h_generation = cpu_to_be32(generation);
|
||||
|
||||
request->md1.v_generic1 = cpu_to_be32(priv);
|
||||
}
|
||||
|
||||
return request;
|
||||
|
@ -664,7 +490,7 @@ static int ocfs2_do_request_vote(struct ocfs2_super *osb,
|
|||
struct ocfs2_vote_msg *request,
|
||||
struct ocfs2_net_response_cb *callback)
|
||||
{
|
||||
int status, response;
|
||||
int status, response = -EBUSY;
|
||||
unsigned int response_id;
|
||||
struct ocfs2_msg_hdr *hdr;
|
||||
|
||||
|
@ -686,109 +512,12 @@ static int ocfs2_do_request_vote(struct ocfs2_super *osb,
|
|||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_request_vote(struct inode *inode,
|
||||
struct ocfs2_vote_msg *request,
|
||||
struct ocfs2_net_response_cb *callback)
|
||||
{
|
||||
int status;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
if (ocfs2_inode_is_new(inode))
|
||||
return 0;
|
||||
|
||||
status = -EAGAIN;
|
||||
while (status == -EAGAIN) {
|
||||
if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) &&
|
||||
signal_pending(current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
status = ocfs2_super_lock(osb, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
break;
|
||||
}
|
||||
|
||||
status = 0;
|
||||
if (!ocfs2_node_map_is_only(osb, &osb->mounted_map,
|
||||
osb->node_num))
|
||||
status = ocfs2_do_request_vote(osb, request, callback);
|
||||
|
||||
ocfs2_super_unlock(osb, 0);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ocfs2_delete_response_cb(void *priv,
|
||||
struct ocfs2_response_msg *resp)
|
||||
{
|
||||
int orphaned_slot, node;
|
||||
struct inode *inode = priv;
|
||||
|
||||
orphaned_slot = be32_to_cpu(resp->r_orphaned_slot);
|
||||
node = be32_to_cpu(resp->r_hdr.h_node_num);
|
||||
mlog(0, "node %d tells us that inode %llu is orphaned in slot %d\n",
|
||||
node, (unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
orphaned_slot);
|
||||
|
||||
/* The other node may not actually know which slot the inode
|
||||
* is orphaned in. */
|
||||
if (orphaned_slot == OCFS2_INVALID_SLOT)
|
||||
return;
|
||||
|
||||
/* Ok, the responding node knows which slot this inode is
|
||||
* orphaned in. We verify that the information is correct and
|
||||
* then record this in the inode. ocfs2_delete_inode will use
|
||||
* this information to determine which lock to take. */
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != orphaned_slot &&
|
||||
OCFS2_I(inode)->ip_orphaned_slot
|
||||
!= OCFS2_INVALID_SLOT, "Inode %llu: Node %d says it's "
|
||||
"orphaned in slot %d, we think it's in %d\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
be32_to_cpu(resp->r_hdr.h_node_num),
|
||||
orphaned_slot, OCFS2_I(inode)->ip_orphaned_slot);
|
||||
|
||||
OCFS2_I(inode)->ip_orphaned_slot = orphaned_slot;
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
}
|
||||
|
||||
int ocfs2_request_delete_vote(struct inode *inode)
|
||||
{
|
||||
int orphaned_slot, status;
|
||||
struct ocfs2_net_response_cb delete_cb;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct ocfs2_vote_msg *request;
|
||||
|
||||
spin_lock(&OCFS2_I(inode)->ip_lock);
|
||||
orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
|
||||
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
||||
|
||||
delete_cb.rc_cb = ocfs2_delete_response_cb;
|
||||
delete_cb.rc_priv = inode;
|
||||
|
||||
mlog(0, "Inode %llu, we start thinking orphaned slot is %d\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, orphaned_slot);
|
||||
|
||||
status = -ENOMEM;
|
||||
request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
|
||||
inode->i_generation,
|
||||
OCFS2_VOTE_REQ_DELETE, orphaned_slot);
|
||||
if (request) {
|
||||
status = ocfs2_request_vote(inode, request, &delete_cb);
|
||||
|
||||
kfree(request);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_request_mount_vote(struct ocfs2_super *osb)
|
||||
{
|
||||
int status;
|
||||
struct ocfs2_vote_msg *request = NULL;
|
||||
|
||||
request = ocfs2_new_vote_request(osb, 0ULL, 0,
|
||||
OCFS2_VOTE_REQ_MOUNT, 0);
|
||||
request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT);
|
||||
if (!request) {
|
||||
status = -ENOMEM;
|
||||
goto bail;
|
||||
|
@ -821,8 +550,7 @@ int ocfs2_request_umount_vote(struct ocfs2_super *osb)
|
|||
int status;
|
||||
struct ocfs2_vote_msg *request = NULL;
|
||||
|
||||
request = ocfs2_new_vote_request(osb, 0ULL, 0,
|
||||
OCFS2_VOTE_REQ_UMOUNT, 0);
|
||||
request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT);
|
||||
if (!request) {
|
||||
status = -ENOMEM;
|
||||
goto bail;
|
||||
|
@ -969,7 +697,6 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg,
|
|||
be32_to_cpu(work->w_msg.v_hdr.h_generation));
|
||||
mlog(0, "h_node_num = %u\n",
|
||||
be32_to_cpu(work->w_msg.v_hdr.h_node_num));
|
||||
mlog(0, "v_generic1 = %u\n", be32_to_cpu(work->w_msg.md1.v_generic1));
|
||||
|
||||
spin_lock(&osb->vote_task_lock);
|
||||
list_add_tail(&work->w_list, &osb->vote_list);
|
||||
|
|
|
@ -38,14 +38,11 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
|
|||
wake_up(&osb->vote_event);
|
||||
}
|
||||
|
||||
int ocfs2_request_delete_vote(struct inode *inode);
|
||||
int ocfs2_request_mount_vote(struct ocfs2_super *osb);
|
||||
int ocfs2_request_umount_vote(struct ocfs2_super *osb);
|
||||
int ocfs2_register_net_handlers(struct ocfs2_super *osb);
|
||||
void ocfs2_unregister_net_handlers(struct ocfs2_super *osb);
|
||||
|
||||
void ocfs2_mark_inode_remotely_deleted(struct inode *inode);
|
||||
|
||||
void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb,
|
||||
int node_num);
|
||||
#endif
|
||||
|
|
|
@ -239,13 +239,11 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
|
|||
/*
|
||||
* `endbyte' is inclusive
|
||||
*/
|
||||
int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
|
||||
unsigned int flags)
|
||||
int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
|
||||
loff_t endbyte, unsigned int flags)
|
||||
{
|
||||
int ret;
|
||||
struct address_space *mapping;
|
||||
|
||||
mapping = file->f_mapping;
|
||||
if (!mapping) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
|
@ -275,4 +273,4 @@ int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
|
|||
out:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_sync_file_range);
|
||||
EXPORT_SYMBOL_GPL(do_sync_mapping_range);
|
||||
|
|
|
@ -843,8 +843,13 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
|
|||
extern int fcntl_getlease(struct file *filp);
|
||||
|
||||
/* fs/sync.c */
|
||||
extern int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
|
||||
unsigned int flags);
|
||||
extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
|
||||
loff_t endbyte, unsigned int flags);
|
||||
static inline int do_sync_file_range(struct file *file, loff_t offset,
|
||||
loff_t endbyte, unsigned int flags)
|
||||
{
|
||||
return do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
|
||||
}
|
||||
|
||||
/* fs/locks.c */
|
||||
extern void locks_init_lock(struct file_lock *);
|
||||
|
|
Loading…
Reference in New Issue