2006-03-21 09:41:47 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
*/
|
|
|
|
#ifndef _DCCP_CCID2_H_
|
|
|
|
#define _DCCP_CCID2_H_
|
|
|
|
|
2006-03-21 11:21:44 +08:00
|
|
|
#include <linux/timer.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include "../ccid.h"
|
2010-08-30 03:23:10 +08:00
|
|
|
#include "../dccp.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CCID-2 timestamping faces the same issues as TCP timestamping.
|
|
|
|
* Hence we reuse/share as much of the code as possible.
|
|
|
|
*/
|
|
|
|
#define ccid2_time_stamp tcp_time_stamp
|
|
|
|
|
2007-11-25 08:04:35 +08:00
|
|
|
/* NUMDUPACK parameter from RFC 4341, p. 6 */
|
|
|
|
#define NUMDUPACK 3
|
2006-03-21 11:21:44 +08:00
|
|
|
|
2006-03-21 09:41:47 +08:00
|
|
|
struct ccid2_seq {
|
|
|
|
u64 ccid2s_seq;
|
2010-08-30 03:23:10 +08:00
|
|
|
u32 ccid2s_sent;
|
2006-03-21 09:41:47 +08:00
|
|
|
int ccid2s_acked;
|
|
|
|
struct ccid2_seq *ccid2s_prev;
|
|
|
|
struct ccid2_seq *ccid2s_next;
|
|
|
|
};
|
|
|
|
|
2006-11-26 11:07:50 +08:00
|
|
|
#define CCID2_SEQBUF_LEN 1024
|
2006-09-20 04:13:37 +08:00
|
|
|
#define CCID2_SEQBUF_MAX 128
|
|
|
|
|
2009-09-12 15:47:01 +08:00
|
|
|
/**
|
|
|
|
* struct ccid2_hc_tx_sock - CCID2 TX half connection
|
2009-10-05 08:53:10 +08:00
|
|
|
* @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
|
|
|
|
* @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
|
dccp ccid-2: Replace broken RTT estimator with better algorithm
The current CCID-2 RTT estimator code is in parts broken and lags behind the
suggestions in RFC2988 of using scaled variants for SRTT/RTTVAR.
That code is replaced by the present patch, which reuses the Linux TCP RTT
estimator code.
Further details:
----------------
1. The minimum RTO of previously one second has been replaced with TCP's, since
RFC4341, sec. 5 says that the minimum of 1 sec. (suggested in RFC2988, 2.4)
is not necessary. Instead, the TCP_RTO_MIN is used, which agrees with DCCP's
concept of a default RTT (RFC 4340, 3.4).
2. The maximum RTO has been set to DCCP_RTO_MAX (64 sec), which agrees with
RFC2988, (2.5).
3. De-inlined the function ccid2_new_ack().
4. Added a FIXME: the RTT is sampled several times per Ack Vector, which will
give the wrong estimate. It should be replaced with one sample per Ack.
However, at the moment this can not be resolved easily, since
- it depends on TX history code (which also needs some work),
- the cleanest solution is not to use the `sent' time at all (saves 4 bytes
per entry) and use DCCP timestamps / elapsed time to estimated the RTT,
which however is non-trivial to get right (but needs to be done).
Reasons for reusing the Linux TCP estimator algorithm:
------------------------------------------------------
Some time was spent to find a better alternative, using basic RFC2988 as a first
step. Further analysis and experimentation showed that the Linux TCP RTO
estimator is superior to a basic RFC2988 implementation. A summary is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid2/rto_estimator/
In addition, this estimator fared well in a recent empirical evaluation:
Rewaskar, Sushant, Jasleen Kaur and F. Donelson Smith.
A Performance Study of Loss Detection/Recovery in Real-world TCP
Implementations. Proceedings of 15th IEEE International
Conference on Network Protocols (ICNP-07), 2007.
Thus there is significant benefit in reusing the existing TCP code.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-08-23 03:41:40 +08:00
|
|
|
* @tx_srtt: smoothed RTT estimate, scaled by 2^3
|
|
|
|
* @tx_mdev: smoothed RTT variation, scaled by 2^2
|
|
|
|
* @tx_mdev_max: maximum of @mdev during one flight
|
|
|
|
* @tx_rttvar: moving average/maximum of @mdev_max
|
|
|
|
* @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
|
|
|
|
* @tx_rtt_seq: to decay RTTVAR at most once per flight
|
dccp ccid-2: Perform congestion-window validation
CCID-2's cwnd increases like TCP during slow-start, which has implications for
* the local Sequence Window value (should be > cwnd),
* the Ack Ratio value.
Hence an exponential growth, if it does not reflect the actual network
conditions, can quickly lead to instability.
This patch adds congestion-window validation (RFC2861) to CCID-2:
* cwnd is constrained if the sender is application limited;
* cwnd is reduced after a long idle period, as suggested in the '90 paper
by Van Jacobson, in RFC 2581 (sec. 4.1);
* cwnd is never reduced below the RFC 3390 initial window.
As marked in the comments, the code is actually almost a direct copy of the
TCP congestion-window-validation algorithms. By continuing this work, it may
in future be possible to use the TCP code (not possible at the moment).
The mechanism can be turned off using a module parameter. Sampling of the
currently-used window (moving-maximum) is however done constantly; this is
used to determine the expected window, which can be exploited to regulate
DCCP's Sequence Window value.
This patch also sets slow-start-after-idle (RFC 4341, 5.1), i.e. it behaves like
TCP when net.ipv4.tcp_slow_start_after_idle = 1.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
2011-07-03 23:55:03 +08:00
|
|
|
* @tx_cwnd_used: actually used cwnd, W_used of RFC 2861
|
|
|
|
* @tx_expected_wnd: moving average of @tx_cwnd_used
|
|
|
|
* @tx_cwnd_stamp: to track idle periods in CWV
|
|
|
|
* @tx_lsndtime: last time (in jiffies) a data packet was sent
|
2009-10-05 08:53:10 +08:00
|
|
|
* @tx_rpseq: last consecutive seqno
|
|
|
|
* @tx_rpdupack: dupacks since rpseq
|
2010-11-15 00:26:13 +08:00
|
|
|
* @tx_av_chunks: list of Ack Vectors received on current skb
|
2009-09-12 15:47:01 +08:00
|
|
|
*/
|
2006-03-21 09:41:47 +08:00
|
|
|
struct ccid2_hc_tx_sock {
|
2009-10-05 08:53:10 +08:00
|
|
|
u32 tx_cwnd;
|
|
|
|
u32 tx_ssthresh;
|
|
|
|
u32 tx_pipe;
|
|
|
|
u32 tx_packets_acked;
|
|
|
|
struct ccid2_seq *tx_seqbuf[CCID2_SEQBUF_MAX];
|
|
|
|
int tx_seqbufc;
|
|
|
|
struct ccid2_seq *tx_seqh;
|
|
|
|
struct ccid2_seq *tx_seqt;
|
dccp ccid-2: Replace broken RTT estimator with better algorithm
The current CCID-2 RTT estimator code is in parts broken and lags behind the
suggestions in RFC2988 of using scaled variants for SRTT/RTTVAR.
That code is replaced by the present patch, which reuses the Linux TCP RTT
estimator code.
Further details:
----------------
1. The minimum RTO of previously one second has been replaced with TCP's, since
RFC4341, sec. 5 says that the minimum of 1 sec. (suggested in RFC2988, 2.4)
is not necessary. Instead, the TCP_RTO_MIN is used, which agrees with DCCP's
concept of a default RTT (RFC 4340, 3.4).
2. The maximum RTO has been set to DCCP_RTO_MAX (64 sec), which agrees with
RFC2988, (2.5).
3. De-inlined the function ccid2_new_ack().
4. Added a FIXME: the RTT is sampled several times per Ack Vector, which will
give the wrong estimate. It should be replaced with one sample per Ack.
However, at the moment this can not be resolved easily, since
- it depends on TX history code (which also needs some work),
- the cleanest solution is not to use the `sent' time at all (saves 4 bytes
per entry) and use DCCP timestamps / elapsed time to estimated the RTT,
which however is non-trivial to get right (but needs to be done).
Reasons for reusing the Linux TCP estimator algorithm:
------------------------------------------------------
Some time was spent to find a better alternative, using basic RFC2988 as a first
step. Further analysis and experimentation showed that the Linux TCP RTO
estimator is superior to a basic RFC2988 implementation. A summary is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid2/rto_estimator/
In addition, this estimator fared well in a recent empirical evaluation:
Rewaskar, Sushant, Jasleen Kaur and F. Donelson Smith.
A Performance Study of Loss Detection/Recovery in Real-world TCP
Implementations. Proceedings of 15th IEEE International
Conference on Network Protocols (ICNP-07), 2007.
Thus there is significant benefit in reusing the existing TCP code.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-08-23 03:41:40 +08:00
|
|
|
|
|
|
|
/* RTT measurement: variables/principles are the same as in TCP */
|
|
|
|
u32 tx_srtt,
|
|
|
|
tx_mdev,
|
|
|
|
tx_mdev_max,
|
|
|
|
tx_rttvar,
|
|
|
|
tx_rto;
|
|
|
|
u64 tx_rtt_seq:48;
|
2009-10-05 08:53:10 +08:00
|
|
|
struct timer_list tx_rtotimer;
|
dccp ccid-2: Replace broken RTT estimator with better algorithm
The current CCID-2 RTT estimator code is in parts broken and lags behind the
suggestions in RFC2988 of using scaled variants for SRTT/RTTVAR.
That code is replaced by the present patch, which reuses the Linux TCP RTT
estimator code.
Further details:
----------------
1. The minimum RTO of previously one second has been replaced with TCP's, since
RFC4341, sec. 5 says that the minimum of 1 sec. (suggested in RFC2988, 2.4)
is not necessary. Instead, the TCP_RTO_MIN is used, which agrees with DCCP's
concept of a default RTT (RFC 4340, 3.4).
2. The maximum RTO has been set to DCCP_RTO_MAX (64 sec), which agrees with
RFC2988, (2.5).
3. De-inlined the function ccid2_new_ack().
4. Added a FIXME: the RTT is sampled several times per Ack Vector, which will
give the wrong estimate. It should be replaced with one sample per Ack.
However, at the moment this can not be resolved easily, since
- it depends on TX history code (which also needs some work),
- the cleanest solution is not to use the `sent' time at all (saves 4 bytes
per entry) and use DCCP timestamps / elapsed time to estimated the RTT,
which however is non-trivial to get right (but needs to be done).
Reasons for reusing the Linux TCP estimator algorithm:
------------------------------------------------------
Some time was spent to find a better alternative, using basic RFC2988 as a first
step. Further analysis and experimentation showed that the Linux TCP RTO
estimator is superior to a basic RFC2988 implementation. A summary is on
http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid2/rto_estimator/
In addition, this estimator fared well in a recent empirical evaluation:
Rewaskar, Sushant, Jasleen Kaur and F. Donelson Smith.
A Performance Study of Loss Detection/Recovery in Real-world TCP
Implementations. Proceedings of 15th IEEE International
Conference on Network Protocols (ICNP-07), 2007.
Thus there is significant benefit in reusing the existing TCP code.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-08-23 03:41:40 +08:00
|
|
|
|
dccp ccid-2: Perform congestion-window validation
CCID-2's cwnd increases like TCP during slow-start, which has implications for
* the local Sequence Window value (should be > cwnd),
* the Ack Ratio value.
Hence an exponential growth, if it does not reflect the actual network
conditions, can quickly lead to instability.
This patch adds congestion-window validation (RFC2861) to CCID-2:
* cwnd is constrained if the sender is application limited;
* cwnd is reduced after a long idle period, as suggested in the '90 paper
by Van Jacobson, in RFC 2581 (sec. 4.1);
* cwnd is never reduced below the RFC 3390 initial window.
As marked in the comments, the code is actually almost a direct copy of the
TCP congestion-window-validation algorithms. By continuing this work, it may
in future be possible to use the TCP code (not possible at the moment).
The mechanism can be turned off using a module parameter. Sampling of the
currently-used window (moving-maximum) is however done constantly; this is
used to determine the expected window, which can be exploited to regulate
DCCP's Sequence Window value.
This patch also sets slow-start-after-idle (RFC 4341, 5.1), i.e. it behaves like
TCP when net.ipv4.tcp_slow_start_after_idle = 1.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
2011-07-03 23:55:03 +08:00
|
|
|
/* Congestion Window validation (optional, RFC 2861) */
|
|
|
|
u32 tx_cwnd_used,
|
|
|
|
tx_expected_wnd,
|
|
|
|
tx_cwnd_stamp,
|
|
|
|
tx_lsndtime;
|
|
|
|
|
2009-10-05 08:53:10 +08:00
|
|
|
u64 tx_rpseq;
|
|
|
|
int tx_rpdupack;
|
2010-08-30 03:23:10 +08:00
|
|
|
u32 tx_last_cong;
|
2009-10-05 08:53:10 +08:00
|
|
|
u64 tx_high_ack;
|
2010-11-15 00:26:13 +08:00
|
|
|
struct list_head tx_av_chunks;
|
2006-03-21 09:41:47 +08:00
|
|
|
};
|
|
|
|
|
2010-10-28 03:16:28 +08:00
|
|
|
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
|
|
|
|
{
|
|
|
|
return hc->tx_pipe >= hc->tx_cwnd;
|
|
|
|
}
|
|
|
|
|
2011-07-03 23:04:18 +08:00
|
|
|
/*
|
|
|
|
* Convert RFC 3390 larger initial window into an equivalent number of packets.
|
|
|
|
* This is based on the numbers specified in RFC 5681, 3.1.
|
|
|
|
*/
|
|
|
|
static inline u32 rfc3390_bytes_to_packets(const u32 smss)
|
|
|
|
{
|
|
|
|
return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
|
|
|
|
}
|
|
|
|
|
2011-07-03 23:53:12 +08:00
|
|
|
/**
|
|
|
|
* struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection
|
|
|
|
* @rx_num_data_pkts: number of data packets received since last feedback
|
|
|
|
*/
|
2006-03-21 09:41:47 +08:00
|
|
|
struct ccid2_hc_rx_sock {
|
2011-07-03 23:53:12 +08:00
|
|
|
u32 rx_num_data_pkts;
|
2006-03-21 09:41:47 +08:00
|
|
|
};
|
|
|
|
|
2006-03-21 11:21:44 +08:00
|
|
|
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
|
|
|
|
}
|
2006-03-21 09:41:47 +08:00
|
|
|
#endif /* _DCCP_CCID2_H_ */
|