Commit 15a7b507 authored by David S. Miller's avatar David S. Miller

Merge

parents 412523e9 ac65c30d
......@@ -1640,7 +1640,8 @@ flush
Writing to this file results in a flush of the routing cache.
gc_elastic, gc_interval, gc_min_interval, gc_tresh, gc_timeout
gc_elasticity, gc_interval, gc_min_interval, gc_tresh, gc_timeout,
gc_thresh, gc_thresh1, gc_thresh2, gc_thresh3
--------------------------------------------------------------
Values to control the frequency and behavior of the garbage collection
......
......@@ -17,6 +17,16 @@ ip_no_pmtu_disc - BOOLEAN
Disable Path MTU Discovery.
default FALSE
min_pmtu - INTEGER
default 562 - minimum discovered Path MTU
mtu_expires - INTEGER
Time, in seconds, that cached PMTU information is kept.
min_adv_mss - INTEGER
The advertised MSS depends on the first hop route MTU, but will
never be lower than this setting.
IP Fragmentation:
ipfrag_high_thresh - INTEGER
......@@ -345,6 +355,19 @@ tcp_default_win_scale - INTEGER
conections.
Default: 7
tcp_frto - BOOLEAN
Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
timeouts. It is particularly beneficial in wireless environments
where packet loss is typically due to random radio interference
rather than intermediate router congestion.
somaxconn - INTEGER
Limit of TCP listen backlog, known in userspace as SOMAXCONN.
Defaults to 128
IP Variables:
ip_local_port_range - 2 INTEGERS
Defines the local port range that is used by TCP and UDP to
choose the local port. The first number is the first, the
......@@ -586,6 +609,19 @@ arp_ignore - INTEGER
The max value from conf/{all,interface}/arp_ignore is used
when ARP request is received on the {interface}
app_solicit - INTEGER
The maximum number of probes to send to the user space ARP daemon
via netlink before dropping back to multicast probes (see
mcast_solicit). Defaults to 0.
disable_policy - BOOLEAN
Disable IPSEC policy (SPD) for this interface
disable_xfrm - BOOLEAN
Disable IPSEC encryption on this interface, whatever the policy
tag - INTEGER
Allows you to write a number, which can be used as required.
Default value is 0.
......@@ -804,4 +840,25 @@ bridge-nf-filter-vlan-tagged - BOOLEAN
Default: 1
UNDOCUMENTED:
dev_weight FIXME
discovery_slots FIXME
discovery_timeout FIXME
fast_poll_increase FIXME
ip6_queue_maxlen FIXME
lap_keepalive_time FIXME
lo_cong FIXME
max_baud_rate FIXME
max_dgram_qlen FIXME
max_noreply_time FIXME
max_tx_data_size FIXME
max_tx_window FIXME
min_tx_turn_time FIXME
mod_cong FIXME
no_cong FIXME
no_cong_thresh FIXME
slot_timeout FIXME
warn_noreply_time FIXME
$Id: ip-sysctl.txt,v 1.20 2001/12/13 09:00:18 davem Exp $
......@@ -1004,11 +1004,12 @@ e1000_setup_desc_rings(struct e1000_adapter *adapter)
struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rxdr, i);
struct sk_buff *skb;
if(!(skb = alloc_skb(E1000_RXBUFFER_2048 + 2, GFP_KERNEL))) {
if(!(skb = alloc_skb(E1000_RXBUFFER_2048 + NET_IP_ALIGN,
GFP_KERNEL))) {
ret_val = 6;
goto err_nomem;
}
skb_reserve(skb, 2);
skb_reserve(skb, NET_IP_ALIGN);
rxdr->buffer_info[i].skb = skb;
rxdr->buffer_info[i].length = E1000_RXBUFFER_2048;
rxdr->buffer_info[i].dma =
......
......@@ -2367,7 +2367,6 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
struct e1000_rx_desc *rx_desc;
struct e1000_buffer *buffer_info;
struct sk_buff *skb;
int reserve_len = 2;
unsigned int i;
i = rx_ring->next_to_use;
......@@ -2376,7 +2375,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
while(!buffer_info->skb) {
rx_desc = E1000_RX_DESC(*rx_ring, i);
skb = dev_alloc_skb(adapter->rx_buffer_len + reserve_len);
skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN);
if(!skb) {
/* Better luck next round */
......@@ -2387,7 +2386,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
* this will result in a 16 byte aligned IP header after
* the 14 byte MAC header is removed
*/
skb_reserve(skb, reserve_len);
skb_reserve(skb, NET_IP_ALIGN);
skb->dev = netdev;
......
......@@ -1876,7 +1876,6 @@ static void ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter)
struct ixgb_rx_desc *rx_desc;
struct ixgb_buffer *buffer_info;
struct sk_buff *skb;
int reserve_len = 2;
unsigned int i;
int num_group_tail_writes;
long cleancount;
......@@ -1895,7 +1894,7 @@ static void ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter)
while (--cleancount > 0) {
rx_desc = IXGB_RX_DESC(*rx_ring, i);
skb = dev_alloc_skb(adapter->rx_buffer_len + reserve_len);
skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN);
if (unlikely(!skb)) {
/* Better luck next round */
......@@ -1906,7 +1905,7 @@ static void ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter)
* this will result in a 16 byte aligned IP header after
* the 14 byte MAC header is removed
*/
skb_reserve(skb, reserve_len);
skb_reserve(skb, NET_IP_ALIGN);
skb->dev = netdev;
......
......@@ -1425,13 +1425,13 @@ int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
goto end;
}
skb = dev_alloc_skb(size + HEADER_ALIGN_LAYER_3);
skb = dev_alloc_skb(size + NET_IP_ALIGN);
if (!skb) {
DBG_PRINT(ERR_DBG, "%s: Out of ", dev->name);
DBG_PRINT(ERR_DBG, "memory to allocate SKBs\n");
return -ENOMEM;
}
skb_reserve(skb, HEADER_ALIGN_LAYER_3);
skb_reserve(skb, NET_IP_ALIGN);
memset(rxdp, 0, sizeof(RxD_t));
rxdp->Buffer0_ptr = pci_map_single
(nic->pdev, skb->data, size, PCI_DMA_FROMDEVICE);
......
......@@ -411,7 +411,6 @@ struct config_param {
#define HEADER_802_2_SIZE 3
#define HEADER_SNAP_SIZE 5
#define HEADER_VLAN_SIZE 4
#define HEADER_ALIGN_LAYER_3 2
#define MIN_MTU 46
#define MAX_PYLD 1500
......
......@@ -279,5 +279,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
(unsigned long)_n_, sizeof(*(ptr))); \
})
/*
* We handle most unaligned accesses in hardware. On the other hand
* unaligned DMA can be very expensive on some ppc64 IO chips (it does
* powers of 2 writes until it reaches sufficient alignment).
*
* Based on this we disable the IP header alignment in network drivers.
*/
#define NET_IP_ALIGN 0
#endif /* __KERNEL__ */
#endif
......@@ -20,6 +20,7 @@
#include <linux/socket.h>
#include <linux/types.h>
#endif
#include <linux/compiler.h>
#include <linux/atmapi.h>
#include <linux/atmsap.h>
#include <linux/atmioc.h>
......
......@@ -816,6 +816,30 @@ static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
skb->tail += len;
}
/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
* hardware handles it or large if we have to take an exception and fix it
* in software.
*
* Since an ethernet header is 14 bytes network drivers often end up with
* the IP header at an unaligned offset. The IP header can be aligned by
* shifting the start of the packet by 2 bytes. Drivers should do this
* with:
*
* skb_reserve(NET_IP_ALIGN);
*
* The downside to this alignment of the IP header is that the DMA is now
* unaligned. On some architectures the cost of an unaligned DMA is high
* and this cost outweighs the gains made by aligning the IP header.
*
* Since this trade off varies between architectures, we allow NET_IP_ALIGN
* to be overridden.
*/
#ifndef NET_IP_ALIGN
#define NET_IP_ALIGN 2
#endif
extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
......
......@@ -413,6 +413,21 @@ static inline int sk_acceptq_is_full(struct sock *sk)
return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
}
/*
* Compute minimal free write space needed to queue new packets.
*/
static inline int sk_stream_min_wspace(struct sock *sk)
{
return sk->sk_wmem_queued / 2;
}
static inline int sk_stream_wspace(struct sock *sk)
{
return sk->sk_sndbuf - sk->sk_wmem_queued;
}
extern void sk_stream_write_space(struct sock *sk);
/* The per-socket spinlock must be held here. */
#define sk_add_backlog(__sk, __skb) \
do { if (!(__sk)->sk_backlog.tail) { \
......@@ -902,6 +917,11 @@ sk_dst_check(struct sock *sk, u32 cookie)
return dst;
}
static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
{
sk->sk_wmem_queued += skb->truesize;
sk->sk_forward_alloc -= skb->truesize;
}
/*
* Queue a received datagram if it will fit. Stream and sequenced
......
......@@ -870,7 +870,6 @@ extern void tcp_close(struct sock *sk,
long timeout);
extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
extern void tcp_write_space(struct sock *sk);
extern int tcp_getsockopt(struct sock *sk, int level,
int optname,
......@@ -1195,21 +1194,6 @@ struct tcp_skb_cb {
#include <net/tcp_ecn.h>
/*
* Compute minimal free write space needed to queue new packets.
*/
static inline int tcp_min_write_space(struct sock *sk)
{
return sk->sk_wmem_queued / 2;
}
static inline int tcp_wspace(struct sock *sk)
{
return sk->sk_sndbuf - sk->sk_wmem_queued;
}
/* This determines how many packets are "in the network" to the best
* of our knowledge. In many cases it is conservative, but where
* detailed information is available from the receiver (via SACK
......@@ -1899,12 +1883,6 @@ static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
{
sk->sk_wmem_queued += skb->truesize;
sk->sk_forward_alloc -= skb->truesize;
}
extern void __tcp_mem_reclaim(struct sock *sk);
extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
......
......@@ -2,7 +2,7 @@
# Makefile for the Linux networking core.
#
obj-y := sock.o skbuff.o iovec.o datagram.o scm.o
obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
......
/*
* SUCS NET3:
*
* Generic stream handling routines. These are generic for most
* protocols. Even IP. Tonight 8-).
* This is used because TCP, LLC (others too) layer all have mostly
* identical sendmsg() and recvmsg() code.
* So we (will) share it here.
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* (from old tcp.c code)
* Alan Cox <alan@redhat.com> (Borrowed comments 8-))
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/signal.h>
#include <linux/wait.h>
#include <net/sock.h>
/**
* sk_stream_write_space - stream socket write_space callback.
* sk - socket
*
* FIXME: write proper description
*/
void sk_stream_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
}
}
EXPORT_SYMBOL(sk_stream_write_space);
......@@ -201,6 +201,7 @@ static int econet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
return 0;
}
#ifdef CONFIG_ECONET_NATIVE
/*
* Queue a transmit result for the user to be told about.
*/
......@@ -228,7 +229,6 @@ static void tx_result(struct sock *sk, unsigned long cookie, int result)
kfree_skb(skb);
}
#ifdef CONFIG_ECONET_NATIVE
/*
* Called by the Econet hardware driver when a packet transmit
* has completed. Tell the user.
......@@ -255,11 +255,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
struct ec_addr addr;
int err;
unsigned char port, cb;
struct sk_buff *skb;
struct ec_cb *eb;
#ifdef CONFIG_ECONET_NATIVE
unsigned short proto = 0;
#endif
#ifdef CONFIG_ECONET_AUNUDP
struct msghdr udpmsg;
struct iovec iov[msg->msg_iovlen+1];
......@@ -316,6 +311,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
{
/* Real hardware Econet. We're not worthy etc. */
#ifdef CONFIG_ECONET_NATIVE
struct ec_cb *eb;
struct sk_buff *skb;
unsigned short proto = 0;
dev_hold(dev);
skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev),
......@@ -718,6 +717,7 @@ static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
#include <linux/smp_lock.h>
SOCKOPS_WRAP(econet, PF_ECONET);
#ifdef CONFIG_ECONET_AUNUDP
/*
* Find the listening socket, if any, for the given data.
*/
......@@ -762,8 +762,6 @@ static int ec_queue_packet(struct sock *sk, struct sk_buff *skb,
return sock_queue_rcv_skb(sk, skb);
}
#ifdef CONFIG_ECONET_AUNUDP
/*
* Send an AUN protocol response.
*/
......
......@@ -447,7 +447,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (tcp_wspace(sk) >= tcp_min_write_space(sk)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
......@@ -458,7 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* wspace test but before the flags are set,
* IO signal will be lost.
*/
if (tcp_wspace(sk) >= tcp_min_write_space(sk))
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
mask |= POLLOUT | POLLWRNORM;
}
}
......@@ -469,24 +469,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
return mask;
}
/*
* TCP socket write_space callback.
*/
void tcp_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
}
}
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
struct tcp_opt *tp = tcp_sk(sk);
......@@ -796,7 +778,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_opt *tp,
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = 0;
__skb_queue_tail(&sk->sk_write_queue, skb);
tcp_charge_skb(sk, skb);
sk_charge_skb(sk, skb);
if (!tp->send_head)
tp->send_head = skb;
else if (tp->nonagle&TCP_NAGLE_PUSH)
......@@ -2635,4 +2617,3 @@ EXPORT_SYMBOL(tcp_shutdown);
EXPORT_SYMBOL(tcp_sockets_allocated);
EXPORT_SYMBOL(tcp_statistics);
EXPORT_SYMBOL(tcp_timewait_cachep);
EXPORT_SYMBOL(tcp_write_space);
......@@ -2081,7 +2081,7 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_state = TCP_CLOSE;
sk->sk_write_space = tcp_write_space;
sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1;
tp->af_specific = &ipv4_specific;
......
......@@ -720,7 +720,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
newsk->sk_callback_lock = RW_LOCK_UNLOCKED;
skb_queue_head_init(&newsk->sk_error_queue);
newsk->sk_write_space = tcp_write_space;
newsk->sk_write_space = sk_stream_write_space;
if ((filter = newsk->sk_filter) != NULL)
sk_filter_charge(newsk, filter);
......
......@@ -326,7 +326,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Advance write_seq and place onto the write_queue. */
tp->write_seq = TCP_SKB_CB(skb)->end_seq;
__skb_queue_tail(&sk->sk_write_queue, skb);
tcp_charge_skb(sk, skb);
sk_charge_skb(sk, skb);
/* Queue it, remembering where we must start sending. */
if (tp->send_head == NULL)
......@@ -439,7 +439,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
tcp_charge_skb(sk, buff);
sk_charge_skb(sk, buff);
/* Correct the sequence numbers. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
......@@ -1169,7 +1169,7 @@ int tcp_send_synack(struct sock *sk)
__skb_unlink(skb, &sk->sk_write_queue);
__skb_queue_head(&sk->sk_write_queue, nskb);
tcp_free_skb(sk, skb);
tcp_charge_skb(sk, nskb);
sk_charge_skb(sk, nskb);
skb = nskb;
}
......@@ -1329,7 +1329,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
__skb_queue_tail(&sk->sk_write_queue, buff);
tcp_charge_skb(sk, buff);
sk_charge_skb(sk, buff);
tp->packets_out++;
tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
TCP_INC_STATS(TcpActiveOpens);
......
......@@ -1878,7 +1878,7 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->af_specific = &ipv6_specific;
sk->sk_write_space = tcp_write_space;
sk->sk_write_space = sk_stream_write_space;
sk->sk_use_write_queue = 1;
sk->sk_sndbuf = sysctl_tcp_wmem[1];
......
......@@ -507,7 +507,7 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
return -1;
}
if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
UDP6_INC_STATS_BH(UdpInErrors);
kfree_skb(skb);
......
......@@ -125,7 +125,7 @@ svc_sock_wspace(struct svc_sock *svsk)
int wspace;
if (svsk->sk_sock->type == SOCK_STREAM)
wspace = tcp_wspace(svsk->sk_sk);
wspace = sk_stream_wspace(svsk->sk_sk);
else
wspace = sock_wspace(svsk->sk_sk);
......
......@@ -1086,8 +1086,8 @@ xprt_write_space(struct sock *sk)
/* Wait until we have enough socket memory */
if (xprt->stream) {
/* from net/ipv4/tcp.c:tcp_write_space */
if (tcp_wspace(sk) < tcp_min_write_space(sk))
/* from net/core/stream.c:sk_stream_write_space */
if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))
goto out;
} else {
/* from net/core/sock.c:sock_def_write_space */
......
......@@ -3343,13 +3343,13 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
struct nlmsghdr *nlh;
struct socket *sock = sk->sk_socket;
struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
if (skb->len < NLMSG_SPACE(0)) {
err = -EINVAL;
goto out;
}
nlh = (struct nlmsghdr *)skb->data;
err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm);
if (err) {
/* Ignore */
......@@ -3366,15 +3366,15 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
{
int err = 0;
if (capable(CAP_NET_ADMIN))
cap_raise (NETLINK_CB (skb).eff_cap, CAP_NET_ADMIN);
else
NETLINK_CB(skb).eff_cap = 0;
if (policydb_loaded_version >= POLICYDB_VERSION_NLCLASS)
err = selinux_nlmsg_perm(sk, skb);
return err;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment