Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/net-next-2.6

This commit is contained in:
David S. Miller 2011-08-01 17:37:22 -07:00
commit ebdcc94b4b
6 changed files with 270 additions and 23 deletions

View File

@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
{
struct dccp_sock *dp = dccp_sk(sk);
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
/*
@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
if (val > DCCPF_ACK_RATIO_MAX)
val = DCCPF_ACK_RATIO_MAX;
dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
min_t(u32, val, DCCPF_ACK_RATIO_MAX));
}
if (val == dp->dccps_l_ack_ratio)
return;
static void ccid2_check_l_ack_ratio(struct sock *sk)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
ccid2_pr_debug("changing local ack ratio to %u\n", val);
dp->dccps_l_ack_ratio = val;
/*
* After a loss, idle period, application limited period, or RTO we
* need to check that the ack ratio is still less than the congestion
* window. Otherwise, we will send an entire congestion window of
* packets and got no response because we haven't sent ack ratio
* packets yet.
* If the ack ratio does need to be reduced, we reduce it to half of
* the congestion window (or 1 if that's zero) instead of to the
* congestion window. This prevents problems if one ack is lost.
*/
if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
}
static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
{
dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
clamp_val(val, DCCPF_SEQ_WMIN,
DCCPF_SEQ_WMAX));
}
static void ccid2_hc_tx_rto_expire(unsigned long data)
@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
}
hc->tx_cwnd_used = 0;
hc->tx_cwnd_stamp = now;
ccid2_check_l_ack_ratio(sk);
}
/* This borrows the code of tcp_cwnd_restart() */
@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
hc->tx_cwnd_stamp = now;
hc->tx_cwnd_used = 0;
ccid2_check_l_ack_ratio(sk);
}
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
if (hc->tx_cwnd < hc->tx_ssthresh) {
if (*maxincr > 0 && ++hc->tx_packets_acked == 2) {
if (hc->tx_cwnd < dp->dccps_l_seq_win &&
r_seq_used < dp->dccps_r_seq_win) {
if (hc->tx_cwnd < hc->tx_ssthresh) {
if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
hc->tx_cwnd += 1;
*maxincr -= 1;
hc->tx_packets_acked = 0;
}
} else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
hc->tx_cwnd += 1;
*maxincr -= 1;
hc->tx_packets_acked = 0;
}
} else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
hc->tx_cwnd += 1;
hc->tx_packets_acked = 0;
}
/*
* Adjust the local sequence window and the ack ratio to allow about
* 5 times the number of packets in the network (RFC 4340 7.5.2)
*/
if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
/*
* FIXME: RTT is sampled several times per acknowledgment (for each
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd)
ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
ccid2_check_l_ack_ratio(sk);
}
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (hc->tx_rpdupack >= NUMDUPACK) {
hc->tx_rpdupack = -1; /* XXX lame */
hc->tx_rpseq = 0;
#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
/*
* FIXME: Ack Congestion Control is broken; in
* the current state instabilities occurred with
* Ack Ratios greater than 1; causing hang-ups
* and long RTO timeouts. This needs to be fixed
* before opening up dynamic changes. -- gerrit
*/
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
#endif
}
}
}

View File

@ -43,6 +43,12 @@ struct ccid2_seq {
#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
/*
* Multiple of congestion window to keep the sequence window at
* (RFC 4340 7.5.2)
*/
#define CCID2_WIN_CHANGE_FACTOR 5
/**
* struct ccid2_hc_tx_sock - CCID2 TX half connection
* @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5

View File

@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
}
extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,

View File

@ -12,6 +12,7 @@
* -----------
* o Feature negotiation is coordinated with connection setup (as in TCP), wild
* changes of parameters of an established connection are not supported.
* o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN.
* o All currently known SP features have 1-byte quantities. If in the future
* extensions of RFCs 4340..42 define features with item lengths larger than
* one byte, a feature-specific extension of the code will be required.
@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
}
/**
* dccp_feat_activate - Activate feature value on socket
* @sk: fully connected DCCP socket (after handshake is complete)
* @feat_num: feature to activate, one of %dccp_feature_numbers
* @local: whether local (1) or remote (0) @feat_num is meant
* @fval: the value (SP or NN) to activate, or NULL to use the default value
* For general use this function is preferable over __dccp_feat_activate().
*/
static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
dccp_feat_val const *fval)
{
return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
}
/* Test for "Req'd" feature (RFC 4340, 6.4) */
static inline int dccp_feat_must_be_understood(u8 feat_num)
{
@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
return -1;
if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
return -1;
/*
* Enter CHANGING after transmitting the Change option (6.6.2).
*/
if (pos->state == FEAT_INITIALISING)
pos->state = FEAT_CHANGING;
if (skb->sk->sk_state == DCCP_OPEN &&
(opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) {
/*
* Confirms don't get retransmitted (6.6.3) once the
* connection is in state OPEN
*/
dccp_feat_list_pop(pos);
} else {
/*
* Enter CHANGING after transmitting the Change
* option (6.6.2).
*/
if (pos->state == FEAT_INITIALISING)
pos->state = FEAT_CHANGING;
}
}
return 0;
}
@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
0, list, len);
}
/**
* dccp_feat_nn_get - Query current/pending value of NN feature
* @sk: DCCP socket of an established connection
* @feat: NN feature number from %dccp_feature_numbers
* For a known NN feature, returns value currently being negotiated, or
* current (confirmed) value if no negotiation is going on.
*/
u64 dccp_feat_nn_get(struct sock *sk, u8 feat)
{
if (dccp_feat_type(feat) == FEAT_NN) {
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_feat_entry *entry;
entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1);
if (entry != NULL)
return entry->val.nn;
switch (feat) {
case DCCPF_ACK_RATIO:
return dp->dccps_l_ack_ratio;
case DCCPF_SEQUENCE_WINDOW:
return dp->dccps_l_seq_win;
}
}
DCCP_BUG("attempt to look up unsupported feature %u", feat);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_feat_nn_get);
/**
* dccp_feat_signal_nn_change - Update NN values for an established connection
* @sk: DCCP socket of an established connection
* @feat: NN feature number from %dccp_feature_numbers
* @nn_val: the new value to use
* This function is used to communicate NN updates out-of-band.
*/
int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
{
struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
dccp_feat_val fval = { .nn = nn_val };
struct dccp_feat_entry *entry;
if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
return 0;
if (dccp_feat_type(feat) != FEAT_NN ||
!dccp_feat_is_valid_nn_val(feat, nn_val))
return -EINVAL;
if (nn_val == dccp_feat_nn_get(sk, feat))
return 0; /* already set or negotiation under way */
entry = dccp_feat_list_lookup(fn, feat, 1);
if (entry != NULL) {
dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n",
(unsigned long long)entry->val.nn,
(unsigned long long)nn_val);
dccp_feat_list_pop(entry);
}
inet_csk_schedule_ack(sk);
return dccp_feat_push_change(fn, feat, 1, 0, &fval);
}
EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
/*
* Tracking features whose value depend on the choice of CCID
@ -1186,6 +1276,100 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
: DCCP_RESET_CODE_OPTION_ERROR;
}
/**
* dccp_feat_handle_nn_established - Fast-path reception of NN options
* @sk: socket of an established DCCP connection
* @mandatory: whether @opt was preceded by a Mandatory option
* @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
* @feat: NN number, one of %dccp_feature_numbers
* @val: NN value
* @len: length of @val in bytes
* This function combines the functionality of change_recv/confirm_recv, with
* the following differences (reset codes are the same):
* - cleanup after receiving the Confirm;
* - values are directly activated after successful parsing;
* - deliberately restricted to NN features.
* The restriction to NN features is essential since SP features can have non-
* predictable outcomes (depending on the remote configuration), and are inter-
* dependent (CCIDs for instance cause further dependencies).
*/
static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
u8 feat, u8 *val, u8 len)
{
struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
const bool local = (opt == DCCPO_CONFIRM_R);
struct dccp_feat_entry *entry;
u8 type = dccp_feat_type(feat);
dccp_feat_val fval;
dccp_feat_print_opt(opt, feat, val, len, mandatory);
/* Ignore non-mandatory unknown and non-NN features */
if (type == FEAT_UNKNOWN) {
if (local && !mandatory)
return 0;
goto fast_path_unknown;
} else if (type != FEAT_NN) {
return 0;
}
/*
* We don't accept empty Confirms, since in fast-path feature
* negotiation the values are enabled immediately after sending
* the Change option.
* Empty Changes on the other hand are invalid (RFC 4340, 6.1).
*/
if (len == 0 || len > sizeof(fval.nn))
goto fast_path_unknown;
if (opt == DCCPO_CHANGE_L) {
fval.nn = dccp_decode_value_var(val, len);
if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
goto fast_path_unknown;
if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
dccp_feat_activate(sk, feat, local, &fval))
return DCCP_RESET_CODE_TOO_BUSY;
/* set the `Ack Pending' flag to piggyback a Confirm */
inet_csk_schedule_ack(sk);
} else if (opt == DCCPO_CONFIRM_R) {
entry = dccp_feat_list_lookup(fn, feat, local);
if (entry == NULL || entry->state != FEAT_CHANGING)
return 0;
fval.nn = dccp_decode_value_var(val, len);
/*
* Just ignore a value that doesn't match our current value.
* If the option changes twice within two RTTs, then at least
* one CONFIRM will be received for the old value after a
* new CHANGE was sent.
*/
if (fval.nn != entry->val.nn)
return 0;
/* Only activate after receiving the Confirm option (6.6.1). */
dccp_feat_activate(sk, feat, local, &fval);
/* It has been confirmed - so remove the entry */
dccp_feat_list_pop(entry);
} else {
DCCP_WARN("Received illegal option %u\n", opt);
goto fast_path_failed;
}
return 0;
fast_path_unknown:
if (!mandatory)
return dccp_push_empty_confirm(fn, feat, local);
fast_path_failed:
return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
: DCCP_RESET_CODE_OPTION_ERROR;
}
/**
* dccp_feat_parse_options - Process Feature-Negotiation Options
* @sk: for general use and used by the client during connection setup
@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
val, len, server);
}
break;
/*
* Support for exchanging NN options on an established connection.
*/
case DCCP_OPEN:
case DCCP_PARTOPEN:
return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
val, len);
}
return 0; /* ignore FN options in all other states */
}

View File

@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
extern int dccp_insert_option_mandatory(struct sk_buff *skb);
extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,

View File

@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dp->dccps_rate_last = jiffies;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
dccp_init_xmit_timers(sk);