Commit 7c657876 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[DCCP]: Initial implementation

Development to this point was done on a subversion repository at:

http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

This repository will be kept at this site for the foreseable future,
so that interested parties can see the history of this code,
attributions, etc.

If I ever decide to take this offline I'll provide the full history at
some other suitable place.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c4365c92
This diff is collapsed.
...@@ -32,6 +32,7 @@ enum { ...@@ -32,6 +32,7 @@ enum {
IPPROTO_PUP = 12, /* PUP protocol */ IPPROTO_PUP = 12, /* PUP protocol */
IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_UDP = 17, /* User Datagram Protocol */
IPPROTO_IDP = 22, /* XNS IDP protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */
IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
IPPROTO_RSVP = 46, /* RSVP protocol */ IPPROTO_RSVP = 46, /* RSVP protocol */
IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
......
...@@ -84,6 +84,7 @@ enum sock_type { ...@@ -84,6 +84,7 @@ enum sock_type {
SOCK_RAW = 3, SOCK_RAW = 3,
SOCK_RDM = 4, SOCK_RDM = 4,
SOCK_SEQPACKET = 5, SOCK_SEQPACKET = 5,
SOCK_DCCP = 6,
SOCK_PACKET = 10, SOCK_PACKET = 10,
}; };
......
...@@ -271,6 +271,7 @@ struct ucred { ...@@ -271,6 +271,7 @@ struct ucred {
#define SOL_IRDA 266 #define SOL_IRDA 266
#define SOL_NETBEUI 267 #define SOL_NETBEUI 267
#define SOL_LLC 268 #define SOL_LLC 268
#define SOL_DCCP 269
/* IPX options */ /* IPX options */
#define IPX_TYPE 1 #define IPX_TYPE 1
......
...@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" ...@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig"
endif endif
source "net/dccp/Kconfig"
source "net/sctp/Kconfig" source "net/sctp/Kconfig"
source "net/atm/Kconfig" source "net/atm/Kconfig"
source "net/bridge/Kconfig" source "net/bridge/Kconfig"
......
...@@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ ...@@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_ECONET) += econet/
obj-$(CONFIG_VLAN_8021Q) += 8021q/ obj-$(CONFIG_VLAN_8021Q) += 8021q/
obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IP_SCTP) += sctp/
ifeq ($(CONFIG_NET),y) ifeq ($(CONFIG_NET),y)
......
menu "DCCP Configuration (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
Datagram Congestion Control Protocol
From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
and on-line games
To compile this protocol support as a module, choose M here: the
module will be called dccp.
If in doubt, say N.
source "net/dccp/ccids/Kconfig"
endmenu
obj-$(CONFIG_IP_DCCP) += dccp.o
dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o
obj-y += ccids/
/*
* net/dccp/ccid.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "ccid.h"
static struct ccid *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ccids_lock);
/*
* The strategy is: modifications ccids vector are short, do not sleep and
* veeery rare, but read access should be free of any exclusive locks.
*/
static void ccids_write_lock(void)
{
spin_lock(&ccids_lock);
while (atomic_read(&ccids_lockct) != 0) {
spin_unlock(&ccids_lock);
yield();
spin_lock(&ccids_lock);
}
}
static inline void ccids_write_unlock(void)
{
spin_unlock(&ccids_lock);
}
static inline void ccids_read_lock(void)
{
atomic_inc(&ccids_lockct);
spin_unlock_wait(&ccids_lock);
}
static inline void ccids_read_unlock(void)
{
atomic_dec(&ccids_lockct);
}
#else
#define ccids_write_lock() do { } while(0)
#define ccids_write_unlock() do { } while(0)
#define ccids_read_lock() do { } while(0)
#define ccids_read_unlock() do { } while(0)
#endif
int ccid_register(struct ccid *ccid)
{
int err;
if (ccid->ccid_init == NULL)
return -1;
ccids_write_lock();
err = -EEXIST;
if (ccids[ccid->ccid_id] == NULL) {
ccids[ccid->ccid_id] = ccid;
err = 0;
}
ccids_write_unlock();
if (err == 0)
pr_info("CCID: Registered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return err;
}
EXPORT_SYMBOL_GPL(ccid_register);
int ccid_unregister(struct ccid *ccid)
{
ccids_write_lock();
ccids[ccid->ccid_id] = NULL;
ccids_write_unlock();
pr_info("CCID: Unregistered CCID %d (%s)\n",
ccid->ccid_id, ccid->ccid_name);
return 0;
}
EXPORT_SYMBOL_GPL(ccid_unregister);
struct ccid *ccid_init(unsigned char id, struct sock *sk)
{
struct ccid *ccid;
#ifdef CONFIG_KMOD
if (ccids[id] == NULL)
request_module("net-dccp-ccid-%d", id);
#endif
ccids_read_lock();
ccid = ccids[id];
if (ccid == NULL)
goto out;
if (!try_module_get(ccid->ccid_owner))
goto out_err;
if (ccid->ccid_init(sk) != 0)
goto out_module_put;
out:
ccids_read_unlock();
return ccid;
out_module_put:
module_put(ccid->ccid_owner);
out_err:
ccid = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(ccid_init);
void ccid_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid == NULL)
return;
ccids_read_lock();
if (ccids[ccid->ccid_id] != NULL) {
if (ccid->ccid_exit != NULL)
ccid->ccid_exit(sk);
module_put(ccid->ccid_owner);
}
ccids_read_unlock();
}
EXPORT_SYMBOL_GPL(ccid_exit);
#ifndef _CCID_H
#define _CCID_H
/*
* net/dccp/ccid.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <net/sock.h>
#include <linux/dccp.h>
#include <linux/list.h>
#include <linux/module.h>
#define CCID_MAX 255
struct ccid {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
int (*ccid_init)(struct sock *sk);
void (*ccid_exit)(struct sock *sk);
int (*ccid_hc_rx_init)(struct sock *sk);
int (*ccid_hc_tx_init)(struct sock *sk);
void (*ccid_hc_rx_exit)(struct sock *sk);
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_rx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
struct sk_buff *skb, int len,
long *delay);
void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len);
};
extern int ccid_register(struct ccid *ccid);
extern int ccid_unregister(struct ccid *ccid);
extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
extern void ccid_exit(struct ccid *ccid, struct sock *sk);
static inline void __ccid_get(struct ccid *ccid)
{
__module_get(ccid->ccid_owner);
}
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb, int len,
long *delay)
{
int rc = 0;
if (ccid->ccid_hc_tx_send_packet != NULL)
rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
int more, int len)
{
if (ccid->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_hc_tx_packet_sent(sk, more, len);
}
static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_rx_init != NULL)
rc = ccid->ccid_hc_rx_init(sk);
return rc;
}
static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
int rc = 0;
if (ccid->ccid_hc_tx_init != NULL)
rc = ccid->ccid_hc_tx_init(sk);
return rc;
}
static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_rx_exit != NULL)
ccid->ccid_hc_rx_exit(sk);
}
static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
{
if (ccid->ccid_hc_tx_exit != NULL)
ccid->ccid_hc_tx_exit(sk);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_packet_recv != NULL)
ccid->ccid_hc_rx_packet_recv(sk, skb);
}
static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_packet_recv != NULL)
ccid->ccid_hc_tx_packet_recv(sk, skb);
}
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_tx_parse_options != NULL)
rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_hc_rx_parse_options != NULL)
rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_tx_insert_options != NULL)
ccid->ccid_hc_tx_insert_options(sk, skb);
}
static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_hc_rx_insert_options != NULL)
ccid->ccid_hc_rx_insert_options(sk, skb);
}
#endif /* _CCID_H */
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on IP_DCCP && EXPERIMENTAL
config IP_DCCP_CCID3
tristate "CCID3 (TFRC) (EXPERIMENTAL)"
depends on IP_DCCP
---help---
CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
where a flow is "reasonably fair" if its sending rate is generally
within a factor of two of the sending rate of a TCP flow under the
same conditions. However, TFRC has a much lower variation of
throughput over time compared with TCP, which makes CCID 3 more
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
congestion control algorithms were initially described in RFC 3448.
This text was extracted from draft-ietf-dccp-spec-11.txt.
If in doubt, say M.
endmenu
obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
dccp_ccid3-y := ccid3.o
This diff is collapsed.
/*
* net/dccp/ccids/ccid3.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_CCID3_H_
#define _DCCP_CCID3_H_
#include <linux/types.h>
#include <linux/list.h>
#include <linux/timer.h>
struct ccid3_tx_hist_entry {
struct list_head ccid3htx_node;
u64 ccid3htx_seqno:48,
ccid3htx_win_count:8,
ccid3htx_sent:1;
struct timeval ccid3htx_tstamp;
};
struct ccid3_options_received {
u64 ccid3or_seqno:48,
ccid3or_loss_intervals_idx:16;
u16 ccid3or_loss_intervals_len;
u32 ccid3or_loss_event_rate;
u32 ccid3or_receive_rate;
};
/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block
*
* @ccid3hctx_state - Sender state
* @ccid3hctx_x - Current sending rate
* @ccid3hctx_x_recv - Receive rate
* @ccid3hctx_x_calc - Calculated send (?) rate
* @ccid3hctx_s - Packet size
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
* @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
* @ccid3hctx_last_win_count - Last window counter sent
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
* @ccid3hctx_idle - FIXME
* @ccid3hctx_t_ld - Time last doubled during slow start
* @ccid3hctx_t_nom - Nominal send time of next packet
* @ccid3hctx_t_ipi - Interpacket (send) interval
* @ccid3hctx_delta - Send timer delta
* @ccid3hctx_hist - Packet history
*/
struct ccid3_hc_tx_sock {
u32 ccid3hctx_x;
u32 ccid3hctx_x_recv;
u32 ccid3hctx_x_calc;
u16 ccid3hctx_s;
u32 ccid3hctx_rtt;
u32 ccid3hctx_p;
u8 ccid3hctx_state;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
struct timeval ccid3hctx_t_ld;
struct timeval ccid3hctx_t_nom;
u32 ccid3hctx_t_ipi;
u32 ccid3hctx_delta;
struct list_head ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
struct ccid3_loss_interval_hist_entry {
struct list_head ccid3lih_node;
u64 ccid3lih_seqno:48,
ccid3lih_win_count:4;
u32 ccid3lih_interval;
};
struct ccid3_rx_hist_entry {
struct list_head ccid3hrx_node;
u64 ccid3hrx_seqno:48,
ccid3hrx_win_count:4,
ccid3hrx_type:4;
u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */
struct timeval ccid3hrx_tstamp;
};
struct ccid3_hc_rx_sock {
u64 ccid3hcrx_seqno_last_counter:48,
ccid3hcrx_state:8,
ccid3hcrx_last_counter:4;
unsigned long ccid3hcrx_rtt;
u32 ccid3hcrx_p;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
struct list_head ccid3hcrx_hist;
struct list_head ccid3hcrx_loss_interval_hist;
u16 ccid3hcrx_s;
u32 ccid3hcrx_pinv;
u32 ccid3hcrx_elapsed_time;
u32 ccid3hcrx_x_recv;
};
#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
#endif /* _DCCP_CCID3_H_ */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* net/dccp/minisocks.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
#include "ccid.h"
#include "dccp.h"
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
/* FIXME: Implement */
dccp_pr_debug("Want to help? Start here\n");
dccp_set_state(sk, state);
}
/* This is for handling early-kills of TIME_WAIT sockets. */
void dccp_tw_deschedule(struct inet_timewait_sock *tw)
{
dccp_pr_debug("Want to help? Start here\n");
__inet_twsk_kill(tw, &dccp_hashinfo);
}
struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb)
{
/*
* Step 3: Process LISTEN state
*
* // Generate a new socket and switch to that socket
* Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk);
newdp->dccps_hc_rx_ackpkts = NULL;
newdp->dccps_role = DCCP_ROLE_SERVER;
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
if (newdp->dccps_options.dccpo_send_ack_vector) {
newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
GFP_ATOMIC);
/*
* XXX: We're using the same CCIDs set on the parent, i.e. sk_clone
* copied the master sock and left the CCID pointers for this child,
* that is why we do the __ccid_get calls.
*/
if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
goto out_free;
}
if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 ||
ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) {
dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
out_free:
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
sk_free(newsk);
return NULL;
}
__ccid_get(newdp->dccps_hc_rx_ccid);
__ccid_get(newdp->dccps_hc_tx_ccid);
/*
* Step 3: Process LISTEN state
*
* Choose S.ISS (initial seqno) or set from Init Cookie
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*/
/* See dccp_v4_conn_request */
newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
dccp_update_gsr(newsk, dreq->dreq_isr);
newdp->dccps_iss = dreq->dreq_iss;
dccp_update_gss(newsk, dreq->dreq_iss);
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
}
/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
struct sock *child = NULL;
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) {
struct dccp_request_sock *dreq = dccp_rsk(req);
dccp_pr_debug("Retransmitted REQUEST\n");
/* Send another RESPONSE packet */
dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq);
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
return NULL;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
goto drop;
/* Invalid ACK */
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n",
DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss);
goto drop;
}
child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
/* FIXME: deal with options */
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
out:
return child;
listen_overflow:
dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
}
/*
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
*/
int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb)
{
int ret = 0;
const int state = child->sk_state;
if (!sock_owned_by_user(child)) {
ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len);
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
parent->sk_data_ready(parent, 0);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
sock_put(child);
return ret;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* net/dccp/timer.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include "dccp.h"
static void dccp_write_timer(unsigned long data);
static void dccp_keepalive_timer(unsigned long data);
static void dccp_delack_timer(unsigned long data);
void dccp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
static void dccp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
}
/* A write timeout has occurred. Process the after effects. */
static int dccp_write_timeout(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
} else {
if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disguisting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
"The one security concern raised by this memo is that ICMP black holes
are often caused by over-zealous security administrators who block
all ICMP messages. It is vitally important that those who design and
deploy security systems understand the impact of strict filtering on
upper-layer protocols. The safest web site in the world is worthless
if most TCP implementations cannot transfer data from it. It would
be far nicer to have all of the black holes fixed rather than fixing
all of the TCP implementations."
Golden words :-).
*/
dst_negative_advice(&sk->sk_dst_cache);
}
retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
}
if (icsk->icsk_retransmits >= retry_until) {
/* Has it gone just too far? */
dccp_write_err(sk);
return 1;
}
return 0;
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
static void dccp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
goto out;
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* The DCCP retransmit timer.
*/
static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
* packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake
* (PARTOPEN timer), etc).
*/
BUG_TRAP(sk->sk_send_head != NULL);
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
goto out;
/*
* We want to know the number of packets retransmitted, not the
* total number of retransmissions of clones of original packets.
*/
if (icsk->icsk_retransmits == 0)
DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
/*
* Retransmission failed because of local congestion,
* do not backoff.
*/
if (icsk->icsk_retransmits == 0)
icsk->icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
min(icsk->icsk_rto,
TCP_RESOURCE_PROBE_INTERVAL),
TCP_RTO_MAX);
goto out;
}
icsk->icsk_backoff++;
icsk->icsk_retransmits++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
__sk_dst_reset(sk);
out:;
}
static void dccp_write_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
int event = 0;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later */
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
goto out;
}
event = icsk->icsk_pending;
icsk->icsk_pending = 0;
switch (event) {
case ICSK_TIME_RETRANS:
dccp_retransmit_timer(sk);
break;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* Timer for listening sockets
*/
static void dccp_response_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
}
static void dccp_keepalive_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
if (sk->sk_state == DCCP_LISTEN) {
dccp_response_timer(sk);
goto out;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment