tcp: prefer packet timing to TS-ECR for RTT
Prefer packet timings to TS-ecr for RTT measurements when both sources are available. That's because broken middle-boxes and remote peer can return packets with corrupted TS ECR fields. Similarly most congestion controls that require RTT signals favor timing-based sources as well. Also check for bad TS ECR values to avoid RTT blow-ups. It has happened on production Web servers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
375fe02c91
commit
5b08e47caf
|
@ -591,7 +591,6 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
|
||||||
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
|
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
|
||||||
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
|
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
|
||||||
extern void tcp_mtup_init(struct sock *sk);
|
extern void tcp_mtup_init(struct sock *sk);
|
||||||
extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
|
|
||||||
extern void tcp_init_buffer_space(struct sock *sk);
|
extern void tcp_init_buffer_space(struct sock *sk);
|
||||||
|
|
||||||
static inline void tcp_bound_rto(const struct sock *sk)
|
static inline void tcp_bound_rto(const struct sock *sk)
|
||||||
|
|
|
@ -2792,65 +2792,36 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
|
||||||
tcp_xmit_retransmit_queue(sk);
|
tcp_xmit_retransmit_queue(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
|
static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
|
||||||
|
s32 seq_rtt)
|
||||||
{
|
{
|
||||||
tcp_rtt_estimator(sk, seq_rtt);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
tcp_set_rto(sk);
|
|
||||||
inet_csk(sk)->icsk_backoff = 0;
|
/* Prefer RTT measured from ACK's timing to TS-ECR. This is because
|
||||||
}
|
* broken middle-boxes or peers may corrupt TS-ECR fields. But
|
||||||
EXPORT_SYMBOL(tcp_valid_rtt_meas);
|
* Karn's algorithm forbids taking RTT if some retransmitted data
|
||||||
|
* is acked (RFC6298).
|
||||||
|
*/
|
||||||
|
if (flag & FLAG_RETRANS_DATA_ACKED)
|
||||||
|
seq_rtt = -1;
|
||||||
|
|
||||||
/* Read draft-ietf-tcplw-high-performance before mucking
|
|
||||||
* with this code. (Supersedes RFC1323)
|
|
||||||
*/
|
|
||||||
static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
|
|
||||||
{
|
|
||||||
/* RTTM Rule: A TSecr value received in a segment is used to
|
/* RTTM Rule: A TSecr value received in a segment is used to
|
||||||
* update the averaged RTT measurement only if the segment
|
* update the averaged RTT measurement only if the segment
|
||||||
* acknowledges some new data, i.e., only if it advances the
|
* acknowledges some new data, i.e., only if it advances the
|
||||||
* left edge of the send window.
|
* left edge of the send window.
|
||||||
*
|
|
||||||
* See draft-ietf-tcplw-high-performance-00, section 3.3.
|
* See draft-ietf-tcplw-high-performance-00, section 3.3.
|
||||||
* 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
|
|
||||||
*
|
|
||||||
* Changed: reset backoff as soon as we see the first valid sample.
|
|
||||||
* If we do not, we get strongly overestimated rto. With timestamps
|
|
||||||
* samples are accepted even from very old segments: f.e., when rtt=1
|
|
||||||
* increases to 8, we retransmit 5 times and after 8 seconds delayed
|
|
||||||
* answer arrives rto becomes 120 seconds! If at least one of segments
|
|
||||||
* in window is lost... Voila. --ANK (010210)
|
|
||||||
*/
|
*/
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
|
||||||
|
seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
|
||||||
|
|
||||||
tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
|
if (seq_rtt < 0)
|
||||||
}
|
|
||||||
|
|
||||||
static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
|
|
||||||
{
|
|
||||||
/* We don't have a timestamp. Can only use
|
|
||||||
* packets that are not retransmitted to determine
|
|
||||||
* rtt estimates. Also, we must not reset the
|
|
||||||
* backoff for rto until we get a non-retransmitted
|
|
||||||
* packet. This allows us to deal with a situation
|
|
||||||
* where the network delay has increased suddenly.
|
|
||||||
* I.e. Karn's algorithm. (SIGCOMM '87, p5.)
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (flag & FLAG_RETRANS_DATA_ACKED)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tcp_valid_rtt_meas(sk, seq_rtt);
|
tcp_rtt_estimator(sk, seq_rtt);
|
||||||
}
|
tcp_set_rto(sk);
|
||||||
|
|
||||||
static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
|
/* RFC6298: only reset backoff on valid RTT measurement. */
|
||||||
const s32 seq_rtt)
|
inet_csk(sk)->icsk_backoff = 0;
|
||||||
{
|
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
|
||||||
/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
|
|
||||||
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
|
|
||||||
tcp_ack_saw_tstamp(sk, flag);
|
|
||||||
else if (seq_rtt >= 0)
|
|
||||||
tcp_ack_no_tstamp(sk, seq_rtt, flag);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
|
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
|
||||||
|
@ -2989,8 +2960,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||||
if (sacked & TCPCB_SACKED_RETRANS)
|
if (sacked & TCPCB_SACKED_RETRANS)
|
||||||
tp->retrans_out -= acked_pcount;
|
tp->retrans_out -= acked_pcount;
|
||||||
flag |= FLAG_RETRANS_DATA_ACKED;
|
flag |= FLAG_RETRANS_DATA_ACKED;
|
||||||
ca_seq_rtt = -1;
|
|
||||||
seq_rtt = -1;
|
|
||||||
} else {
|
} else {
|
||||||
ca_seq_rtt = now - scb->when;
|
ca_seq_rtt = now - scb->when;
|
||||||
last_ackt = skb->tstamp;
|
last_ackt = skb->tstamp;
|
||||||
|
|
Loading…
Reference in New Issue