mirror of https://gitee.com/openkylin/linux.git
rds: add type of service(tos) infrastructure
RDS Service type (TOS) is user-defined and needs to be configured via RDS IOCTL interface. It must be set before initiating any traffic and once set the TOS can not be changed. All out-going traffic from the socket will be associated with its TOS. Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
This commit is contained in:
parent
d021fabf52
commit
3eb450367d
|
@ -69,6 +69,12 @@
|
||||||
#define RDS_TRANS_COUNT 3
|
#define RDS_TRANS_COUNT 3
|
||||||
#define RDS_TRANS_NONE (~0)
|
#define RDS_TRANS_NONE (~0)
|
||||||
|
|
||||||
|
/* IOCTLS commands for SOL_RDS */
|
||||||
|
#define SIOCRDSSETTOS (SIOCPROTOPRIVATE)
|
||||||
|
#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1)
|
||||||
|
|
||||||
|
typedef __u8 rds_tos_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Control message types for SOL_RDS.
|
* Control message types for SOL_RDS.
|
||||||
*
|
*
|
||||||
|
@ -149,6 +155,7 @@ struct rds_info_connection {
|
||||||
__be32 faddr;
|
__be32 faddr;
|
||||||
__u8 transport[TRANSNAMSIZ]; /* null term ascii */
|
__u8 transport[TRANSNAMSIZ]; /* null term ascii */
|
||||||
__u8 flags;
|
__u8 flags;
|
||||||
|
__u8 tos;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
struct rds6_info_connection {
|
struct rds6_info_connection {
|
||||||
|
@ -171,6 +178,7 @@ struct rds_info_message {
|
||||||
__be16 lport;
|
__be16 lport;
|
||||||
__be16 fport;
|
__be16 fport;
|
||||||
__u8 flags;
|
__u8 flags;
|
||||||
|
__u8 tos;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
struct rds6_info_message {
|
struct rds6_info_message {
|
||||||
|
@ -214,6 +222,7 @@ struct rds_info_tcp_socket {
|
||||||
__u32 last_sent_nxt;
|
__u32 last_sent_nxt;
|
||||||
__u32 last_expected_una;
|
__u32 last_expected_una;
|
||||||
__u32 last_seen_una;
|
__u32 last_seen_una;
|
||||||
|
__u8 tos;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
struct rds6_info_tcp_socket {
|
struct rds6_info_tcp_socket {
|
||||||
|
@ -240,6 +249,7 @@ struct rds_info_rdma_connection {
|
||||||
__u32 max_send_sge;
|
__u32 max_send_sge;
|
||||||
__u32 rdma_mr_max;
|
__u32 rdma_mr_max;
|
||||||
__u32 rdma_mr_size;
|
__u32 rdma_mr_size;
|
||||||
|
__u8 tos;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rds6_info_rdma_connection {
|
struct rds6_info_rdma_connection {
|
||||||
|
@ -253,6 +263,7 @@ struct rds6_info_rdma_connection {
|
||||||
__u32 max_send_sge;
|
__u32 max_send_sge;
|
||||||
__u32 rdma_mr_max;
|
__u32 rdma_mr_max;
|
||||||
__u32 rdma_mr_size;
|
__u32 rdma_mr_size;
|
||||||
|
__u8 tos;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* RDS message Receive Path Latency points */
|
/* RDS message Receive Path Latency points */
|
||||||
|
|
|
@ -254,7 +254,38 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
|
||||||
|
|
||||||
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
return -ENOIOCTLCMD;
|
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
|
||||||
|
rds_tos_t tos;
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
case SIOCRDSSETTOS:
|
||||||
|
if (get_user(tos, (rds_tos_t __user *)arg))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (rs->rs_transport &&
|
||||||
|
rs->rs_transport->t_type == RDS_TRANS_TCP)
|
||||||
|
tos = 0;
|
||||||
|
|
||||||
|
spin_lock_bh(&rds_sock_lock);
|
||||||
|
if (rs->rs_tos || rs->rs_conn) {
|
||||||
|
spin_unlock_bh(&rds_sock_lock);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
rs->rs_tos = tos;
|
||||||
|
spin_unlock_bh(&rds_sock_lock);
|
||||||
|
break;
|
||||||
|
case SIOCRDSGETTOS:
|
||||||
|
spin_lock_bh(&rds_sock_lock);
|
||||||
|
tos = rs->rs_tos;
|
||||||
|
spin_unlock_bh(&rds_sock_lock);
|
||||||
|
if (put_user(tos, (rds_tos_t __user *)arg))
|
||||||
|
return -EFAULT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -ENOIOCTLCMD;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
|
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
|
||||||
|
@ -650,6 +681,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
|
||||||
spin_lock_init(&rs->rs_rdma_lock);
|
spin_lock_init(&rs->rs_rdma_lock);
|
||||||
rs->rs_rdma_keys = RB_ROOT;
|
rs->rs_rdma_keys = RB_ROOT;
|
||||||
rs->rs_rx_traces = 0;
|
rs->rs_rx_traces = 0;
|
||||||
|
rs->rs_tos = 0;
|
||||||
|
rs->rs_conn = NULL;
|
||||||
|
|
||||||
spin_lock_bh(&rds_sock_lock);
|
spin_lock_bh(&rds_sock_lock);
|
||||||
list_add_tail(&rs->rs_item, &rds_sock_list);
|
list_add_tail(&rs->rs_item, &rds_sock_list);
|
||||||
|
|
|
@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans,
|
struct rds_transport *trans,
|
||||||
int dev_if)
|
u8 tos, int dev_if)
|
||||||
{
|
{
|
||||||
struct rds_connection *conn, *ret = NULL;
|
struct rds_connection *conn, *ret = NULL;
|
||||||
|
|
||||||
|
@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
|
||||||
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
|
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
|
||||||
ipv6_addr_equal(&conn->c_laddr, laddr) &&
|
ipv6_addr_equal(&conn->c_laddr, laddr) &&
|
||||||
conn->c_trans == trans &&
|
conn->c_trans == trans &&
|
||||||
|
conn->c_tos == tos &&
|
||||||
net == rds_conn_net(conn) &&
|
net == rds_conn_net(conn) &&
|
||||||
conn->c_dev_if == dev_if) {
|
conn->c_dev_if == dev_if) {
|
||||||
ret = conn;
|
ret = conn;
|
||||||
|
@ -160,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans,
|
struct rds_transport *trans,
|
||||||
gfp_t gfp,
|
gfp_t gfp, u8 tos,
|
||||||
int is_outgoing,
|
int is_outgoing,
|
||||||
int dev_if)
|
int dev_if)
|
||||||
{
|
{
|
||||||
|
@ -172,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
|
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
|
conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
|
||||||
if (conn &&
|
if (conn &&
|
||||||
conn->c_loopback &&
|
conn->c_loopback &&
|
||||||
conn->c_trans != &rds_loop_transport &&
|
conn->c_trans != &rds_loop_transport &&
|
||||||
|
@ -206,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
|
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
|
||||||
conn->c_faddr = *faddr;
|
conn->c_faddr = *faddr;
|
||||||
conn->c_dev_if = dev_if;
|
conn->c_dev_if = dev_if;
|
||||||
|
conn->c_tos = tos;
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_IPV6)
|
#if IS_ENABLED(CONFIG_IPV6)
|
||||||
/* If the local address is link local, set c_bound_if to be the
|
/* If the local address is link local, set c_bound_if to be the
|
||||||
|
@ -298,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
struct rds_connection *found;
|
struct rds_connection *found;
|
||||||
|
|
||||||
found = rds_conn_lookup(net, head, laddr, faddr, trans,
|
found = rds_conn_lookup(net, head, laddr, faddr, trans,
|
||||||
dev_if);
|
tos, dev_if);
|
||||||
if (found) {
|
if (found) {
|
||||||
struct rds_conn_path *cp;
|
struct rds_conn_path *cp;
|
||||||
int i;
|
int i;
|
||||||
|
@ -333,10 +335,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
|
||||||
struct rds_connection *rds_conn_create(struct net *net,
|
struct rds_connection *rds_conn_create(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans, gfp_t gfp,
|
struct rds_transport *trans, u8 tos,
|
||||||
int dev_if)
|
gfp_t gfp, int dev_if)
|
||||||
{
|
{
|
||||||
return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
|
return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_conn_create);
|
EXPORT_SYMBOL_GPL(rds_conn_create);
|
||||||
|
|
||||||
|
@ -344,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans,
|
struct rds_transport *trans,
|
||||||
gfp_t gfp, int dev_if)
|
u8 tos, gfp_t gfp, int dev_if)
|
||||||
{
|
{
|
||||||
return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
|
return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
|
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
|
||||||
|
|
||||||
|
|
|
@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
|
||||||
|
|
||||||
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
|
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
|
||||||
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
|
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
|
||||||
|
iinfo->tos = conn->c_tos;
|
||||||
|
|
||||||
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
|
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
|
||||||
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
|
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
|
||||||
|
|
|
@ -786,7 +786,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
|
||||||
|
|
||||||
/* RDS/IB is not currently netns aware, thus init_net */
|
/* RDS/IB is not currently netns aware, thus init_net */
|
||||||
conn = rds_conn_create(&init_net, daddr6, saddr6,
|
conn = rds_conn_create(&init_net, daddr6, saddr6,
|
||||||
&rds_ib_transport, GFP_KERNEL, ifindex);
|
&rds_ib_transport, 0, GFP_KERNEL, ifindex);
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
|
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
|
||||||
conn = NULL;
|
conn = NULL;
|
||||||
|
|
|
@ -115,6 +115,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
|
||||||
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
|
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
|
||||||
&conn->c_laddr, &conn->c_faddr);
|
&conn->c_laddr, &conn->c_faddr);
|
||||||
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
|
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
|
||||||
|
conn->c_tos = 0;
|
||||||
rds_conn_drop(conn);
|
rds_conn_drop(conn);
|
||||||
}
|
}
|
||||||
rdsdebug("Connection rejected: %s\n",
|
rdsdebug("Connection rejected: %s\n",
|
||||||
|
|
|
@ -158,6 +158,9 @@ struct rds_connection {
|
||||||
unsigned int c_version;
|
unsigned int c_version;
|
||||||
possible_net_t c_net;
|
possible_net_t c_net;
|
||||||
|
|
||||||
|
/* TOS */
|
||||||
|
u8 c_tos;
|
||||||
|
|
||||||
struct list_head c_map_item;
|
struct list_head c_map_item;
|
||||||
unsigned long c_map_queued;
|
unsigned long c_map_queued;
|
||||||
|
|
||||||
|
@ -652,6 +655,7 @@ struct rds_sock {
|
||||||
u8 rs_rx_traces;
|
u8 rs_rx_traces;
|
||||||
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
|
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
|
||||||
struct rds_msg_zcopy_queue rs_zcookie_queue;
|
struct rds_msg_zcopy_queue rs_zcookie_queue;
|
||||||
|
u8 rs_tos;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
|
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
|
||||||
|
@ -760,13 +764,14 @@ void rds_conn_exit(void);
|
||||||
struct rds_connection *rds_conn_create(struct net *net,
|
struct rds_connection *rds_conn_create(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans, gfp_t gfp,
|
struct rds_transport *trans,
|
||||||
|
u8 tos, gfp_t gfp,
|
||||||
int dev_if);
|
int dev_if);
|
||||||
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
struct rds_connection *rds_conn_create_outgoing(struct net *net,
|
||||||
const struct in6_addr *laddr,
|
const struct in6_addr *laddr,
|
||||||
const struct in6_addr *faddr,
|
const struct in6_addr *faddr,
|
||||||
struct rds_transport *trans,
|
struct rds_transport *trans,
|
||||||
gfp_t gfp, int dev_if);
|
u8 tos, gfp_t gfp, int dev_if);
|
||||||
void rds_conn_shutdown(struct rds_conn_path *cpath);
|
void rds_conn_shutdown(struct rds_conn_path *cpath);
|
||||||
void rds_conn_destroy(struct rds_connection *conn);
|
void rds_conn_destroy(struct rds_connection *conn);
|
||||||
void rds_conn_drop(struct rds_connection *conn);
|
void rds_conn_drop(struct rds_connection *conn);
|
||||||
|
|
|
@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
|
||||||
|
|
||||||
minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
|
minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
|
||||||
minfo.len = be32_to_cpu(inc->i_hdr.h_len);
|
minfo.len = be32_to_cpu(inc->i_hdr.h_len);
|
||||||
|
minfo.tos = inc->i_conn->c_tos;
|
||||||
|
|
||||||
if (flip) {
|
if (flip) {
|
||||||
minfo.laddr = daddr;
|
minfo.laddr = daddr;
|
||||||
|
|
|
@ -1277,12 +1277,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
|
|
||||||
/* rds_conn_create has a spinlock that runs with IRQ off.
|
/* rds_conn_create has a spinlock that runs with IRQ off.
|
||||||
* Caching the conn in the socket helps a lot. */
|
* Caching the conn in the socket helps a lot. */
|
||||||
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
|
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) {
|
||||||
conn = rs->rs_conn;
|
conn = rs->rs_conn;
|
||||||
else {
|
} else {
|
||||||
conn = rds_conn_create_outgoing(sock_net(sock->sk),
|
conn = rds_conn_create_outgoing(sock_net(sock->sk),
|
||||||
&rs->rs_bound_addr, &daddr,
|
&rs->rs_bound_addr, &daddr,
|
||||||
rs->rs_transport,
|
rs->rs_transport, 0,
|
||||||
sock->sk->sk_allocation,
|
sock->sk->sk_allocation,
|
||||||
scope_id);
|
scope_id);
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
|
|
|
@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
|
||||||
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
|
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
|
||||||
tsinfo.last_expected_una = tc->t_last_expected_una;
|
tsinfo.last_expected_una = tc->t_last_expected_una;
|
||||||
tsinfo.last_seen_una = tc->t_last_seen_una;
|
tsinfo.last_seen_una = tc->t_last_seen_una;
|
||||||
|
tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
|
||||||
|
|
||||||
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
|
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock)
|
||||||
|
|
||||||
conn = rds_conn_create(sock_net(sock->sk),
|
conn = rds_conn_create(sock_net(sock->sk),
|
||||||
my_addr, peer_addr,
|
my_addr, peer_addr,
|
||||||
&rds_tcp_transport, GFP_KERNEL, dev_if);
|
&rds_tcp_transport, 0, GFP_KERNEL, dev_if);
|
||||||
|
|
||||||
if (IS_ERR(conn)) {
|
if (IS_ERR(conn)) {
|
||||||
ret = PTR_ERR(conn);
|
ret = PTR_ERR(conn);
|
||||||
|
|
Loading…
Reference in New Issue