libceph: a major OSD client update
This is a major sync up, up to ~Jewel. The highlights are: - per-session request trees (vs a global per-client tree) - per-session locking (vs a global per-client rwlock) - homeless OSD session - no ad-hoc global per-client lists - support for pool quotas - foundation for watch/notify v2 support - foundation for map check (pool deletion detection) support The switchover is incomplete: lingering requests can be setup and teared down but aren't ever reestablished. This functionality is restored with the introduction of the new lingering infrastructure (ceph_osd_linger_request, linger_work, etc) in a later commit. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
9dd2845ccb
commit
5aea3dcd50
|
@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
||||||
if (copy_from_user(&dl, arg, sizeof(dl)))
|
if (copy_from_user(&dl, arg, sizeof(dl)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
down_read(&osdc->map_sem);
|
down_read(&osdc->lock);
|
||||||
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
|
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
|
||||||
&dl.object_no, &dl.object_offset,
|
&dl.object_no, &dl.object_offset,
|
||||||
&olen);
|
&olen);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
up_read(&osdc->map_sem);
|
up_read(&osdc->lock);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
dl.file_offset -= dl.object_offset;
|
dl.file_offset -= dl.object_offset;
|
||||||
|
@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
||||||
|
|
||||||
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
|
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
up_read(&osdc->map_sem);
|
up_read(&osdc->lock);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
||||||
} else {
|
} else {
|
||||||
memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
|
memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
|
||||||
}
|
}
|
||||||
up_read(&osdc->map_sem);
|
up_read(&osdc->lock);
|
||||||
|
|
||||||
/* send result back to user */
|
/* send result back to user */
|
||||||
if (copy_to_user(arg, &dl, sizeof(dl)))
|
if (copy_to_user(arg, &dl, sizeof(dl)))
|
||||||
|
|
|
@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
|
||||||
char buf[128];
|
char buf[128];
|
||||||
|
|
||||||
dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
|
dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
|
||||||
down_read(&osdc->map_sem);
|
down_read(&osdc->lock);
|
||||||
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
||||||
if (pool_name) {
|
if (pool_name) {
|
||||||
size_t len = strlen(pool_name);
|
size_t len = strlen(pool_name);
|
||||||
|
@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
|
||||||
ret = -ERANGE;
|
ret = -ERANGE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
up_read(&osdc->map_sem);
|
up_read(&osdc->lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
|
||||||
s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
|
s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
|
||||||
const char *pool_name;
|
const char *pool_name;
|
||||||
|
|
||||||
down_read(&osdc->map_sem);
|
down_read(&osdc->lock);
|
||||||
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
||||||
if (pool_name)
|
if (pool_name)
|
||||||
ret = snprintf(val, size, "%s", pool_name);
|
ret = snprintf(val, size, "%s", pool_name);
|
||||||
else
|
else
|
||||||
ret = snprintf(val, size, "%lld", (unsigned long long)pool);
|
ret = snprintf(val, size, "%lld", (unsigned long long)pool);
|
||||||
up_read(&osdc->map_sem);
|
up_read(&osdc->lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,12 +33,13 @@ struct ceph_osd {
|
||||||
int o_incarnation;
|
int o_incarnation;
|
||||||
struct rb_node o_node;
|
struct rb_node o_node;
|
||||||
struct ceph_connection o_con;
|
struct ceph_connection o_con;
|
||||||
struct list_head o_requests;
|
struct rb_root o_requests;
|
||||||
struct list_head o_linger_requests;
|
struct list_head o_linger_requests;
|
||||||
struct list_head o_osd_lru;
|
struct list_head o_osd_lru;
|
||||||
struct ceph_auth_handshake o_auth;
|
struct ceph_auth_handshake o_auth;
|
||||||
unsigned long lru_ttl;
|
unsigned long lru_ttl;
|
||||||
struct list_head o_keepalive_item;
|
struct list_head o_keepalive_item;
|
||||||
|
struct mutex lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CEPH_OSD_SLAB_OPS 2
|
#define CEPH_OSD_SLAB_OPS 2
|
||||||
|
@ -144,8 +145,6 @@ struct ceph_osd_request_target {
|
||||||
struct ceph_osd_request {
|
struct ceph_osd_request {
|
||||||
u64 r_tid; /* unique for this client */
|
u64 r_tid; /* unique for this client */
|
||||||
struct rb_node r_node;
|
struct rb_node r_node;
|
||||||
struct list_head r_req_lru_item;
|
|
||||||
struct list_head r_osd_item;
|
|
||||||
struct list_head r_linger_item;
|
struct list_head r_linger_item;
|
||||||
struct list_head r_linger_osd_item;
|
struct list_head r_linger_osd_item;
|
||||||
struct ceph_osd *r_osd;
|
struct ceph_osd *r_osd;
|
||||||
|
@ -219,19 +218,16 @@ struct ceph_osd_client {
|
||||||
struct ceph_client *client;
|
struct ceph_client *client;
|
||||||
|
|
||||||
struct ceph_osdmap *osdmap; /* current map */
|
struct ceph_osdmap *osdmap; /* current map */
|
||||||
struct rw_semaphore map_sem;
|
struct rw_semaphore lock;
|
||||||
|
|
||||||
struct mutex request_mutex;
|
|
||||||
struct rb_root osds; /* osds */
|
struct rb_root osds; /* osds */
|
||||||
struct list_head osd_lru; /* idle osds */
|
struct list_head osd_lru; /* idle osds */
|
||||||
spinlock_t osd_lru_lock;
|
spinlock_t osd_lru_lock;
|
||||||
u64 last_tid; /* tid of last request */
|
|
||||||
struct rb_root requests; /* pending requests */
|
|
||||||
struct list_head req_lru; /* in-flight lru */
|
|
||||||
struct list_head req_unsent; /* unsent/need-resend queue */
|
|
||||||
struct list_head req_notarget; /* map to no osd */
|
|
||||||
struct list_head req_linger; /* lingering requests */
|
struct list_head req_linger; /* lingering requests */
|
||||||
int num_requests;
|
struct ceph_osd homeless_osd;
|
||||||
|
atomic64_t last_tid; /* tid of last request */
|
||||||
|
atomic_t num_requests;
|
||||||
|
atomic_t num_homeless;
|
||||||
struct delayed_work timeout_work;
|
struct delayed_work timeout_work;
|
||||||
struct delayed_work osds_timeout_work;
|
struct delayed_work osds_timeout_work;
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
|
|
|
@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
|
||||||
seq_putc(s, '\n');
|
seq_putc(s, '\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dump_requests(struct seq_file *s, struct ceph_osd *osd)
|
||||||
|
{
|
||||||
|
struct rb_node *n;
|
||||||
|
|
||||||
|
mutex_lock(&osd->lock);
|
||||||
|
for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
|
||||||
|
struct ceph_osd_request *req =
|
||||||
|
rb_entry(n, struct ceph_osd_request, r_node);
|
||||||
|
|
||||||
|
dump_request(s, req);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&osd->lock);
|
||||||
|
}
|
||||||
|
|
||||||
static int osdc_show(struct seq_file *s, void *pp)
|
static int osdc_show(struct seq_file *s, void *pp)
|
||||||
{
|
{
|
||||||
struct ceph_client *client = s->private;
|
struct ceph_client *client = s->private;
|
||||||
struct ceph_osd_client *osdc = &client->osdc;
|
struct ceph_osd_client *osdc = &client->osdc;
|
||||||
struct rb_node *p;
|
struct rb_node *n;
|
||||||
|
|
||||||
mutex_lock(&osdc->request_mutex);
|
down_read(&osdc->lock);
|
||||||
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
|
seq_printf(s, "REQUESTS %d homeless %d\n",
|
||||||
struct ceph_osd_request *req;
|
atomic_read(&osdc->num_requests),
|
||||||
|
atomic_read(&osdc->num_homeless));
|
||||||
|
for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
|
||||||
|
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
|
||||||
|
|
||||||
req = rb_entry(p, struct ceph_osd_request, r_node);
|
dump_requests(s, osd);
|
||||||
|
|
||||||
dump_request(s, req);
|
|
||||||
}
|
}
|
||||||
mutex_unlock(&osdc->request_mutex);
|
dump_requests(s, &osdc->homeless_osd);
|
||||||
|
|
||||||
|
up_read(&osdc->lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue