Merge branch 'tcp-improve-delivered-counts-in-SCM_TSTAMP_ACK'

Yousuk Seung says:

====================
tcp: improve delivered counts in SCM_TSTAMP_ACK

Currently delivered and delivered_ce in OPT_STATS of SCM_TSTAMP_ACK do
not fully reflect the current ack being timestamped. Also they are not
in sync as the delivered count includes packets being sacked and some of
cumulatively acked but delivered_ce includes none.

This patch series updates tp->delivered and tp->delivered_ce together to
keep them in sync. It also moves generating SCM_TSTAMP_ACK to later in
tcp_clean_rtx_queue() to reflect packets being cumulatively acked up
until the current skb for sack-enabled connections.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-06-27 17:41:27 -07:00
commit 491f14db45
1 changed files with 39 additions and 20 deletions

View File

@ -962,6 +962,15 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
} }
} }
/* Updates the delivered and delivered_ce counts */
static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
bool ece_ack)
{
tp->delivered += delivered;
if (ece_ack)
tp->delivered_ce += delivered;
}
/* This procedure tags the retransmission queue when SACKs arrive. /* This procedure tags the retransmission queue when SACKs arrive.
* *
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@ -1138,6 +1147,7 @@ struct tcp_sacktag_state {
struct rate_sample *rate; struct rate_sample *rate;
int flag; int flag;
unsigned int mss_now; unsigned int mss_now;
u32 sack_delivered;
}; };
/* Check if skb is fully within the SACK block. In presence of GSO skbs, /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@ -1258,7 +1268,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
sacked |= TCPCB_SACKED_ACKED; sacked |= TCPCB_SACKED_ACKED;
state->flag |= FLAG_DATA_SACKED; state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount; tp->sacked_out += pcount;
tp->delivered += pcount; /* Out-of-order packets delivered */ /* Out-of-order packets delivered */
state->sack_delivered += pcount;
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */ /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
if (tp->lost_skb_hint && if (tp->lost_skb_hint &&
@ -1684,7 +1695,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
num_sacks, prior_snd_una); num_sacks, prior_snd_una);
if (found_dup_sack) { if (found_dup_sack) {
state->flag |= FLAG_DSACKING_ACK; state->flag |= FLAG_DSACKING_ACK;
tp->delivered++; /* A spurious retransmission is delivered */ /* A spurious retransmission is delivered */
state->sack_delivered++;
} }
/* Eliminate too old ACKs, but take into /* Eliminate too old ACKs, but take into
@ -1893,7 +1905,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */ /* Emulate SACKs for SACKless connection: account for a new dupack. */
static void tcp_add_reno_sack(struct sock *sk, int num_dupack) static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
{ {
if (num_dupack) { if (num_dupack) {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
@ -1904,20 +1916,21 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
tcp_check_reno_reordering(sk, 0); tcp_check_reno_reordering(sk, 0);
delivered = tp->sacked_out - prior_sacked; delivered = tp->sacked_out - prior_sacked;
if (delivered > 0) if (delivered > 0)
tp->delivered += delivered; tcp_count_delivered(tp, delivered, ece_ack);
tcp_verify_left_out(tp); tcp_verify_left_out(tp);
} }
} }
/* Account for ACK, ACKing some data in Reno Recovery phase. */ /* Account for ACK, ACKing some data in Reno Recovery phase. */
static void tcp_remove_reno_sacks(struct sock *sk, int acked) static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (acked > 0) { if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */ /* One ACK acked hole. The rest eat duplicate ACKs. */
tp->delivered += max_t(int, acked - tp->sacked_out, 1); tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
ece_ack);
if (acked - 1 >= tp->sacked_out) if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0; tp->sacked_out = 0;
else else
@ -2697,7 +2710,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
* delivered. Lower inflight to clock out (re)tranmissions. * delivered. Lower inflight to clock out (re)tranmissions.
*/ */
if (after(tp->snd_nxt, tp->high_seq) && num_dupack) if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
tcp_add_reno_sack(sk, num_dupack); tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
else if (flag & FLAG_SND_UNA_ADVANCED) else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp); tcp_reset_reno_sack(tp);
} }
@ -2779,6 +2792,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag; int fast_rexmit = 0, flag = *ack_flag;
bool ece_ack = flag & FLAG_ECE;
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) && bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
tcp_force_fast_retransmit(sk)); tcp_force_fast_retransmit(sk));
@ -2787,7 +2801,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
/* Now state machine starts. /* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */ * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
if (flag & FLAG_ECE) if (ece_ack)
tp->prior_ssthresh = 0; tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */ /* B. In all the states check for reneging SACKs. */
@ -2828,7 +2842,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
case TCP_CA_Recovery: case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp)) if (tcp_is_reno(tp))
tcp_add_reno_sack(sk, num_dupack); tcp_add_reno_sack(sk, num_dupack, ece_ack);
} else { } else {
if (tcp_try_undo_partial(sk, prior_snd_una)) if (tcp_try_undo_partial(sk, prior_snd_una))
return; return;
@ -2853,7 +2867,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) { if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED) if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp); tcp_reset_reno_sack(tp);
tcp_add_reno_sack(sk, num_dupack); tcp_add_reno_sack(sk, num_dupack, ece_ack);
} }
if (icsk->icsk_ca_state <= TCP_CA_Disorder) if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@ -2877,7 +2891,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
} }
/* Otherwise enter Recovery state */ /* Otherwise enter Recovery state */
tcp_enter_recovery(sk, (flag & FLAG_ECE)); tcp_enter_recovery(sk, ece_ack);
fast_rexmit = 1; fast_rexmit = 1;
} }
@ -3053,7 +3067,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
*/ */
static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u32 prior_snd_una, u32 prior_snd_una,
struct tcp_sacktag_state *sack) struct tcp_sacktag_state *sack, bool ece_ack)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt; u64 first_ackt, last_ackt;
@ -3078,8 +3092,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u8 sacked = scb->sacked; u8 sacked = scb->sacked;
u32 acked_pcount; u32 acked_pcount;
tcp_ack_tstamp(sk, skb, prior_snd_una);
/* Determine how many packets and what bytes were acked, tso and else */ /* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) { if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 || if (tcp_skb_pcount(skb) == 1 ||
@ -3114,7 +3126,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (sacked & TCPCB_SACKED_ACKED) { if (sacked & TCPCB_SACKED_ACKED) {
tp->sacked_out -= acked_pcount; tp->sacked_out -= acked_pcount;
} else if (tcp_is_sack(tp)) { } else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount; tcp_count_delivered(tp, acked_pcount, ece_ack);
if (!tcp_skb_spurious_retrans(tp, skb)) if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq, tcp_rack_advance(tp, sacked, scb->end_seq,
tcp_skb_timestamp_us(skb)); tcp_skb_timestamp_us(skb));
@ -3143,6 +3155,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked) if (!fully_acked)
break; break;
tcp_ack_tstamp(sk, skb, prior_snd_una);
next = skb_rb_next(skb); next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint)) if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL; tp->retransmit_skb_hint = NULL;
@ -3191,7 +3205,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
} }
if (tcp_is_reno(tp)) { if (tcp_is_reno(tp)) {
tcp_remove_reno_sacks(sk, pkts_acked); tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
/* If any of the cumulatively ACKed segments was /* If any of the cumulatively ACKed segments was
* retransmitted, non-SACK case cannot confirm that * retransmitted, non-SACK case cannot confirm that
@ -3558,10 +3572,9 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
delivered = tp->delivered - prior_delivered; delivered = tp->delivered - prior_delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered); NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
if (flag & FLAG_ECE) { if (flag & FLAG_ECE)
tp->delivered_ce += delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered); NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
}
return delivered; return delivered;
} }
@ -3585,6 +3598,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_state.first_sackt = 0; sack_state.first_sackt = 0;
sack_state.rate = &rs; sack_state.rate = &rs;
sack_state.sack_delivered = 0;
/* We very likely will need to access rtx queue. */ /* We very likely will need to access rtx queue. */
prefetch(sk->tcp_rtx_queue.rb_node); prefetch(sk->tcp_rtx_queue.rb_node);
@ -3660,6 +3674,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
ack_ev_flags |= CA_ACK_ECE; ack_ev_flags |= CA_ACK_ECE;
} }
if (sack_state.sack_delivered)
tcp_count_delivered(tp, sack_state.sack_delivered,
flag & FLAG_ECE);
if (flag & FLAG_WIN_UPDATE) if (flag & FLAG_WIN_UPDATE)
ack_ev_flags |= CA_ACK_WIN_UPDATE; ack_ev_flags |= CA_ACK_WIN_UPDATE;
@ -3685,7 +3703,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue; goto no_queue;
/* See if we can take anything off of the retransmit queue. */ /* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state); flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
flag & FLAG_ECE);
tcp_rack_update_reo_wnd(sk, &rs); tcp_rack_update_reo_wnd(sk, &rs);