From fde20105f332614b23a3131d706cd90bdd7db72d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 24 Oct 2007 10:12:09 -0200 Subject: [PATCH 1/5] [DCCP]: Retrieve packet sequence number for error reporting This fixes a problem when analysing erroneous packets in dccp_v{4,6}_err: * dccp_hdr_seq currently takes an skb * however, the transport headers in the skb are shifted, due to the preceding IPv4/v6 header. Fixed for v4 and v6 by changing dccp_hdr_seq to take a struct dccp_hdr as argument. Verified that the correct sequence number is now reported in the error handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 10 ++++------ net/dccp/ipv4.c | 4 ++-- net/dccp/ipv6.c | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index f3fc4392e93d..55d28cb97596 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -270,10 +270,9 @@ static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) return memset(skb_transport_header(skb), 0, headlen); } -static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { - return (struct dccp_hdr_ext *)(skb_transport_header(skb) + - sizeof(struct dccp_hdr)); + return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); } static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) @@ -287,13 +286,12 @@ static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) return __dccp_basic_hdr_len(dh); } -static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { - const struct dccp_hdr *dh = dccp_hdr(skb); __u64 seq_nr = ntohs(dh->dccph_seq); if (dh->dccph_x != 0) - seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(dh)->dccph_seq_low); else seq_nr += (u32)dh->dccph_seq2 << 16; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 222549ab274a..3f1578db93e8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -241,7 +241,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(skb); + seq = dccp_hdr_seq(dh); if (sk->sk_state != DCCP_LISTEN && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); @@ -795,7 +795,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; dccp_pr_debug("%8.8s " diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index bbadd6681b83..62428ff137dd 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -173,7 +173,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_err_convert(type, code, &err); - seq = DCCP_SKB_CB(skb)->dccpd_seq; + seq = dccp_hdr_seq(dh); /* Might be for an request_sock */ switch (sk->sk_state) { struct request_sock *req, **prev; @@ -787,7 +787,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; if (dccp_packet_without_ack(skb)) From 1238d0873b29f7a2de75d576b4cc706c1c75ffbf Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 24 Oct 2007 10:18:06 -0200 Subject: [PATCH 2/5] [DCCP]: One more exemption from full sequence number checks This fixes the following problem: client connects to peer which has no DCCP enabled or loaded; ICMP error messages ("Protocol Unavailable") can be seen on the wire, but the application hangs. Reason: ICMP packets don't get through to dccp_v4_err. When reporting errors, a sequence number check is made for the DCCP packet that had caused an ICMP error to arrive. Such checks can not be made if the socket is in state LISTEN, RESPOND (which in the implementation is the same as LISTEN), or REQUEST, since update_gsr() has not been called in these states, hence the sequence window is 0..0. This patch fixes the problem by adding the REQUEST state as another exemption to the window check. The error reporting now works as expected on connecting. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f1578db93e8..01a6a808bdb7 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -242,7 +242,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); - if (sk->sk_state != DCCP_LISTEN && + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; From d8ef2c29a0dcfccb2d90cac990143d1a4668708a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 24 Oct 2007 10:27:48 -0200 Subject: [PATCH 3/5] [DCCP]: Convert Reset code into socket error number This adds support for converting the 11 currently defined Reset codes into system error numbers, which are stored in sk_err for further interpretation. This makes the externally visible API behaviour similar to TCP, since a client connecting to a non-existing port will experience ECONNREFUSED. * Code 0, Unspecified, is interpreted as non-error (0); * Code 1, Closed (normal termination), also maps into 0; * Code 2, Aborted, maps into "Connection reset by peer" (ECONNRESET); * Code 3, No Connection and Code 7, Connection Refused, map into "Connection refused" (ECONNREFUSED); * Code 4, Packet Error, maps into "No message of desired type" (ENOMSG); * Code 5, Option Error, maps into "Illegal byte sequence" (EILSEQ); * Code 6, Mandatory Error, maps into "Operation not supported on transport endpoint" (EOPNOTSUPP); * Code 8, Bad Service Code, maps into "Invalid request code" (EBADRQC); * Code 9, Too Busy, maps into "Too many users" (EUSERS); * Code 10, Bad Init Cookie, maps into "Invalid request descriptor" (EBADR); * Code 11, Aggression Penalty, maps into "Quota exceeded" (EDQUOT) which makes sense in terms of using more than the `fair share' of bandwidth. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 ++ net/dccp/input.c | 48 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 55d28cb97596..333c3ea82a5d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -144,6 +144,8 @@ enum dccp_reset_codes { DCCP_RESET_CODE_TOO_BUSY, DCCP_RESET_CODE_BAD_INIT_COOKIE, DCCP_RESET_CODE_AGGRESSION_PENALTY, + + DCCP_MAX_RESET_CODES /* Leave at the end! */ }; /* DCCP options */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 3560a2a875a0..1ce101062824 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -58,6 +58,42 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) dccp_send_close(sk, 0); } +static u8 dccp_reset_code_convert(const u8 code) +{ + const u8 error_code[] = { + [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ + [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ + [DCCP_RESET_CODE_ABORTED] = ECONNRESET, + + [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED, + [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED, + [DCCP_RESET_CODE_TOO_BUSY] = EUSERS, + [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT, + + [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG, + [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR, + [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC, + [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ, + [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP, + }; + + return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code]; +} + +static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) +{ + u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); + + sk->sk_err = err; + + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + + if (err && !sock_flag(sk, SOCK_DEAD)) + sk_wake_async(sk, 0, POLL_ERR); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); +} + static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -191,9 +227,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + */ + dccp_rcv_reset(sk, skb); return 0; case DCCP_PKT_CLOSEREQ: dccp_rcv_closereq(sk, skb); @@ -521,12 +556,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { - /* - * Queue the equivalent of TCP fin so that dccp_recvmsg - * exits the loop - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + dccp_rcv_reset(sk, skb); return 0; /* * Step 7: Check for unexpected packet types From 76fd1e87d9456c8185b8df76ac5e533e0c8b39bb Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 24 Oct 2007 10:46:58 -0200 Subject: [PATCH 4/5] [DCCP]: Unaligned pointer access This fixes `unaligned (read) access' errors of the type Kernel unaligned access at TPC[100f970c] dccp_parse_options+0x4f4/0x7e0 [dccp] Kernel unaligned access at TPC[1011f2e4] ccid3_hc_tx_parse_options+0x1ac/0x380 [dccp_ccid3] Kernel unaligned access at TPC[100f9898] dccp_parse_options+0x680/0x880 [dccp] by using the get_unaligned macro for parsing options. Commiter note: Preserved the sparse __be{16,32} annotations. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid3.c | 11 +++++++---- net/dccp/options.c | 33 ++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 25772c326172..05f263e9160d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -40,6 +40,8 @@ #include "lib/tfrc.h" #include "ccid3.h" +#include + #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static int ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) @@ -544,6 +546,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; + __be32 opt_val; opt_recv = &hctx->ccid3hctx_options_received; @@ -563,8 +566,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = - ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); @@ -585,8 +588,8 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = - ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_receive_rate = ntohl(opt_val); ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); diff --git a/net/dccp/options.c b/net/dccp/options.c index d361b5533309..d286cffe2c49 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) unsigned char opt, len; unsigned char *value; u32 elapsed_time; + __be32 opt_val; int rc; int mandatory = 0; @@ -145,7 +147,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (len != 4) goto out_invalid_option; - opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->dccpor_timestamp = ntohl(opt_val); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; dp->dccps_timestamp_time = ktime_get_real(); @@ -159,7 +162,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (len != 4 && len != 6 && len != 8) goto out_invalid_option; - opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); + opt_val = get_unaligned((__be32 *)value); + opt_recv->dccpor_timestamp_echo = ntohl(opt_val); dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " "ackno=%llu", dccp_role(sk), @@ -168,16 +172,20 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); + value += 4; - if (len == 4) { + if (len == 4) { /* no elapsed time included */ dccp_pr_debug_cat("\n"); break; } - if (len == 6) - elapsed_time = ntohs(*(__be16 *)(value + 4)); - else - elapsed_time = ntohl(*(__be32 *)(value + 4)); + if (len == 6) { /* 2-byte elapsed time */ + __be16 opt_val2 = get_unaligned((__be16 *)value); + elapsed_time = ntohs(opt_val2); + } else { /* 4-byte elapsed time */ + opt_val = get_unaligned((__be32 *)value); + elapsed_time = ntohl(opt_val); + } dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time); @@ -192,10 +200,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) if (pkt_type == DCCP_PKT_DATA) continue; - if (len == 2) - elapsed_time = ntohs(*(__be16 *)value); - else - elapsed_time = ntohl(*(__be32 *)value); + if (len == 2) { + __be16 opt_val2 = get_unaligned((__be16 *)value); + elapsed_time = ntohs(opt_val2); + } else { + opt_val = get_unaligned((__be32 *)value); + elapsed_time = ntohl(opt_val); + } if (elapsed_time > opt_recv->dccpor_elapsed_time) opt_recv->dccpor_elapsed_time = elapsed_time; From 24c667db59a9cc4caaafe4f77f6f4ef85899a454 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 24 Oct 2007 10:53:01 -0200 Subject: [PATCH 5/5] [CCID2/3]: Initialisation assignments of 0 are redundant Assigning initial values of `0' is redundant when loading a new CCID structure, since in net/dccp/ccid.c the entire CCID structure is zeroed out prior to initialisation in ccid_new(): struct ccid { struct ccid_operations *ccid_ops; char ccid_priv[0]; }; // ... if (rx) { memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size); if (ccid->ccid_ops->ccid_hc_rx_init != NULL && ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0) goto out_free_ccid; } else { memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size); /* analogous to the rx case */ } This patch therefore removes the redundant assignments. Thanks to Arnaldo for the inspiration. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/ccids/ccid2.c | 4 ---- net/dccp/ccids/ccid3.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 426008e3b7e3..d694656b8800 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -750,20 +750,16 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) */ hctx->ccid2hctx_ssthresh = ~0; hctx->ccid2hctx_numdupack = 3; - hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; - hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; ccid2_change_srtt(hctx, -1); hctx->ccid2hctx_rttvar = -1; - hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; hctx->ccid2hctx_last_cong = jiffies; - hctx->ccid2hctx_high_ack = 0; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 05f263e9160d..19b33586333d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -604,8 +604,6 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); - hctx->ccid3hctx_s = 0; - hctx->ccid3hctx_rtt = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); @@ -966,8 +964,6 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); - hcrx->ccid3hcrx_s = 0; - hcrx->ccid3hcrx_rtt = 0; return 0; }