Commit e811f324 authored by James Morris's avatar James Morris Committed by David S. Miller

[NETFILTER]: Fixup ip6_queue much like ip_queue was:

- Fix unicast pid wrap issue
- Fix potential module unload races for netfilter and netlink paths
- General code cleanup
- Queue session cannot be overridden by another client
- Client can set copy mode to none, which stops queueing
parent 8d4cb2d7
......@@ -15,7 +15,7 @@
* real coder of this.
* Few changes needed, mainly the hard_routing code and
* the netlink socket protocol (we're NETLINK_IP6_FW).
*
* 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
*/
#include <linux/module.h>
#include <linux/skbuff.h>
......@@ -26,18 +26,12 @@
#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/rtnetlink.h>
#include <linux/brlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
/* We're still usign the following structs. No need to change them: */
/* ipq_packet_msg */
/* ipq_mode_msg */
/* ipq_verdict_msg */
/* ipq_peer_msg */
#include <linux/netfilter_ipv4/ip_queue.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
......@@ -47,184 +41,289 @@
#define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
typedef struct ip6q_rt_info {
struct ipq_rt_info {
struct in6_addr daddr;
struct in6_addr saddr;
} ip6q_rt_info_t;
typedef struct ip6q_queue_element {
struct list_head list; /* Links element into queue */
int verdict; /* Current verdict */
struct nf_info *info; /* Extra info from netfilter */
struct sk_buff *skb; /* Packet inside */
ip6q_rt_info_t rt_info; /* May need post-mangle routing */
} ip6q_queue_element_t;
typedef int (*ip6q_send_cb_t)(ip6q_queue_element_t *e);
typedef struct ip6q_peer {
pid_t pid; /* PID of userland peer */
unsigned char died; /* We think the peer died */
unsigned char copy_mode; /* Copy packet as well as metadata? */
size_t copy_range; /* Range past metadata to copy */
ip6q_send_cb_t send; /* Callback for sending data to peer */
} ip6q_peer_t;
typedef struct ip6q_queue {
int len; /* Current queue len */
int *maxlen; /* Maximum queue len, via sysctl */
unsigned char flushing; /* If queue is being flushed */
unsigned char terminate; /* If the queue is being terminated */
struct list_head list; /* Head of packet queue */
spinlock_t lock; /* Queue spinlock */
ip6q_peer_t peer; /* Userland peer */
} ip6q_queue_t;
/****************************************************************************
*
* Packet queue
*
****************************************************************************/
/* Dequeue a packet if matched by cmp, or the next available if cmp is NULL */
static ip6q_queue_element_t *
ip6q_dequeue(ip6q_queue_t *q,
int (*cmp)(ip6q_queue_element_t *, unsigned long),
unsigned long data)
{
struct list_head *i;
};
struct ipq_queue_entry {
struct list_head list;
struct nf_info *info;
struct sk_buff *skb;
struct ipq_rt_info rt_info;
};
spin_lock_bh(&q->lock);
for (i = q->list.prev; i != &q->list; i = i->prev) {
ip6q_queue_element_t *e = (ip6q_queue_element_t *)i;
typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
if (!cmp || cmp(e, data)) {
list_del(&e->list);
q->len--;
spin_unlock_bh(&q->lock);
return e;
static unsigned char copy_mode = IPQ_COPY_NONE;
static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
static rwlock_t queue_lock = RW_LOCK_UNLOCKED;
static int peer_pid;
static unsigned int copy_range;
static unsigned int queue_total;
static struct sock *ipqnl;
static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem);
static void
ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
{
nf_reinject(entry->skb, entry->info, verdict);
kfree(entry);
}
static inline int
__ipq_enqueue_entry(struct ipq_queue_entry *entry)
{
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
}
list_add(&entry->list, &queue_list);
queue_total++;
return 0;
}
/*
* Find and return a queued entry matched by cmpfn, or return the last
* entry if cmpfn is NULL.
*/
static inline struct ipq_queue_entry *
__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
{
struct list_head *p;
list_for_each_prev(p, &queue_list) {
struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
if (!cmpfn || cmpfn(entry, data))
return entry;
}
spin_unlock_bh(&q->lock);
return NULL;
}
/* Flush all packets */
static void ip6q_flush(ip6q_queue_t *q)
static inline void
__ipq_dequeue_entry(struct ipq_queue_entry *entry)
{
ip6q_queue_element_t *e;
spin_lock_bh(&q->lock);
q->flushing = 1;
spin_unlock_bh(&q->lock);
while ((e = ip6q_dequeue(q, NULL, 0))) {
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
}
spin_lock_bh(&q->lock);
q->flushing = 0;
spin_unlock_bh(&q->lock);
list_del(&entry->list);
queue_total--;
}
static ip6q_queue_t *ip6q_create_queue(nf_queue_outfn_t outfn,
ip6q_send_cb_t send_cb,
int *errp, int *sysctl_qmax)
static inline struct ipq_queue_entry *
__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
{
int status;
ip6q_queue_t *q;
struct ipq_queue_entry *entry;
*errp = 0;
q = kmalloc(sizeof(ip6q_queue_t), GFP_KERNEL);
if (q == NULL) {
*errp = -ENOMEM;
entry = __ipq_find_entry(cmpfn, data);
if (entry == NULL)
return NULL;
__ipq_dequeue_entry(entry);
return entry;
}
static inline void
__ipq_flush(int verdict)
{
struct ipq_queue_entry *entry;
while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
ipq_issue_verdict(entry, verdict);
}
static inline int
__ipq_set_mode(unsigned char mode, unsigned int range)
{
int status = 0;
switch(mode) {
case IPQ_COPY_NONE:
case IPQ_COPY_META:
copy_mode = mode;
copy_range = 0;
break;
case IPQ_COPY_PACKET:
copy_mode = mode;
copy_range = range;
if (copy_range > 0xFFFF)
copy_range = 0xFFFF;
break;
default:
status = -EINVAL;
}
q->peer.pid = 0;
q->peer.died = 0;
q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0;
q->peer.send = send_cb;
q->len = 0;
q->maxlen = sysctl_qmax;
q->flushing = 0;
q->terminate = 0;
INIT_LIST_HEAD(&q->list);
spin_lock_init(&q->lock);
status = nf_register_queue_handler(PF_INET6, outfn, q);
if (status < 0) {
*errp = -EBUSY;
kfree(q);
return status;
}
static inline void
__ipq_reset(void)
{
peer_pid = 0;
__ipq_set_mode(IPQ_COPY_NONE, 0);
__ipq_flush(NF_DROP);
}
static struct ipq_queue_entry *
ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
{
struct ipq_queue_entry *entry;
write_lock_bh(&queue_lock);
entry = __ipq_find_dequeue_entry(cmpfn, data);
write_unlock_bh(&queue_lock);
return entry;
}
static void
ipq_flush(int verdict)
{
write_lock_bh(&queue_lock);
__ipq_flush(verdict);
write_unlock_bh(&queue_lock);
}
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
read_lock_bh(&queue_lock);
switch (copy_mode) {
case IPQ_COPY_META:
case IPQ_COPY_NONE:
size = NLMSG_SPACE(sizeof(*pmsg));
data_len = 0;
break;
case IPQ_COPY_PACKET:
if (copy_range == 0 || copy_range > entry->skb->len)
data_len = entry->skb->len;
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
break;
default:
*errp = -EINVAL;
read_unlock_bh(&queue_lock);
return NULL;
}
return q;
read_unlock_bh(&queue_lock);
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
goto nlmsg_failure;
old_tail= skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
pmsg->mark = entry->skb->nfmark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
if (entry->info->indev)
strcpy(pmsg->indev_name, entry->info->indev->name);
else
pmsg->indev_name[0] = '\0';
if (entry->info->outdev)
strcpy(pmsg->outdev_name, entry->info->outdev->name);
else
pmsg->outdev_name[0] = '\0';
if (entry->info->indev && entry->skb->dev) {
pmsg->hw_type = entry->skb->dev->type;
if (entry->skb->dev->hard_header_parse)
pmsg->hw_addrlen =
entry->skb->dev->hard_header_parse(entry->skb,
pmsg->hw_addr);
}
if (data_len)
memcpy(pmsg->payload, entry->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
nlmsg_failure:
if (skb)
kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip6_queue: error creating packet message\n");
return NULL;
}
static int ip6q_enqueue(ip6q_queue_t *q,
struct sk_buff *skb, struct nf_info *info)
static int
ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
{
ip6q_queue_element_t *e;
int status;
int status = -EINVAL;
struct sk_buff *nskb;
struct ipq_queue_entry *entry;
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
printk(KERN_ERR "ip6_queue: OOM in enqueue\n");
if (copy_mode == IPQ_COPY_NONE)
return -EAGAIN;
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL) {
printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
return -ENOMEM;
}
e->verdict = NF_DROP;
e->info = info;
e->skb = skb;
entry->info = info;
entry->skb = skb;
if (e->info->hook == NF_IP_LOCAL_OUT) {
if (entry->info->hook == NF_IP_LOCAL_OUT) {
struct ipv6hdr *iph = skb->nh.ipv6h;
e->rt_info.daddr = iph->daddr;
e->rt_info.saddr = iph->saddr;
entry->rt_info.daddr = iph->daddr;
entry->rt_info.saddr = iph->saddr;
}
spin_lock_bh(&q->lock);
if (q->len >= *q->maxlen) {
spin_unlock_bh(&q->lock);
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, "
"dropping packet(s).\n", q->len);
goto free_drop;
}
if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
|| q->peer.pid == 0 || q->peer.died || q->terminate) {
spin_unlock_bh(&q->lock);
goto free_drop;
}
status = q->peer.send(e);
if (status > 0) {
list_add(&e->list, &q->list);
q->len++;
spin_unlock_bh(&q->lock);
nskb = ipq_build_packet_message(entry, &status);
if (nskb == NULL)
goto err_out_free;
write_lock_bh(&queue_lock);
if (!peer_pid)
goto err_out_unlock;
status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
if (status < 0)
goto err_out_unlock;
status = __ipq_enqueue_entry(entry);
if (status < 0)
goto err_out_unlock;
write_unlock_bh(&queue_lock);
return status;
}
spin_unlock_bh(&q->lock);
if (status == -ECONNREFUSED) {
printk(KERN_INFO "ip6_queue: peer %d died, "
"resetting state and flushing queue\n", q->peer.pid);
q->peer.died = 1;
q->peer.pid = 0;
q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0;
ip6q_flush(q);
}
free_drop:
kfree(e);
return -EBUSY;
}
static void ip6q_destroy_queue(ip6q_queue_t *q)
{
nf_unregister_queue_handler(PF_INET6);
spin_lock_bh(&q->lock);
q->terminate = 1;
spin_unlock_bh(&q->lock);
ip6q_flush(q);
kfree(q);
err_out_unlock:
write_unlock_bh(&queue_lock);
err_out_free:
kfree(entry);
return status;
}
/*
......@@ -236,7 +335,8 @@ static void ip6q_destroy_queue(ip6q_queue_t *q)
*
* If that one is modified, this one should be modified too.
*/
static int route6_me_harder(struct sk_buff *skb)
static int
route6_me_harder(struct sk_buff *skb)
{
struct ipv6hdr *iph = skb->nh.ipv6h;
struct dst_entry *dst;
......@@ -264,7 +364,9 @@ static int route6_me_harder(struct sk_buff *skb)
skb->dst = dst;
return 0;
}
static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e)
static int
ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{
int diff;
struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
......@@ -319,70 +421,67 @@ static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e)
return 0;
}
static inline int id_cmp(ip6q_queue_element_t *e, unsigned long id)
static inline int
id_cmp(struct ipq_queue_entry *e, unsigned long id)
{
return (id == (unsigned long )e);
}
static int ip6q_set_verdict(ip6q_queue_t *q,
ipq_verdict_msg_t *v, unsigned int len)
static int
ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
{
ip6q_queue_element_t *e;
struct ipq_queue_entry *entry;
if (v->value > NF_MAX_VERDICT)
if (vmsg->value > NF_MAX_VERDICT)
return -EINVAL;
e = ip6q_dequeue(q, id_cmp, v->id);
if (e == NULL)
entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
if (entry == NULL)
return -ENOENT;
else {
e->verdict = v->value;
if (v->data_len && v->data_len == len)
if (ip6q_mangle_ipv6(v, e) < 0)
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
int verdict = vmsg->value;
if (vmsg->data_len && vmsg->data_len == len)
if (ipq_mangle_ipv6(vmsg, entry) < 0)
verdict = NF_DROP;
ipq_issue_verdict(entry, verdict);
return 0;
}
}
static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m,
unsigned char type, unsigned int len)
static int
ipq_set_mode(unsigned char mode, unsigned int range)
{
int status;
write_lock_bh(&queue_lock);
status = __ipq_set_mode(mode, range);
write_unlock_bh(&queue_lock);
return status;
}
static int
ipq_receive_peer(struct ipq_peer_msg *pmsg,
unsigned char type, unsigned int len)
{
int status = 0;
int busy;
spin_lock_bh(&q->lock);
busy = (q->terminate || q->flushing);
spin_unlock_bh(&q->lock);
if (busy)
return -EBUSY;
if (len < sizeof(ipq_peer_msg_t))
if (len < sizeof(*pmsg))
return -EINVAL;
switch (type) {
case IPQM_MODE:
switch (m->msg.mode.value) {
case IPQ_COPY_META:
q->peer.copy_mode = IPQ_COPY_META;
q->peer.copy_range = 0;
break;
case IPQ_COPY_PACKET:
q->peer.copy_mode = IPQ_COPY_PACKET;
q->peer.copy_range = m->msg.mode.range;
if (q->peer.copy_range > 0xFFFF)
q->peer.copy_range = 0xFFFF;
break;
default:
status = -EINVAL;
}
status = ipq_set_mode(pmsg->msg.mode.value,
pmsg->msg.mode.range);
break;
case IPQM_VERDICT:
if (m->msg.verdict.value > NF_MAX_VERDICT)
if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
status = -EINVAL;
else
status = ip6q_set_verdict(q,
&m->msg.verdict,
len - sizeof(*m));
status = ipq_set_verdict(&pmsg->msg.verdict,
len - sizeof(*pmsg));
break;
default:
status = -EINVAL;
......@@ -390,273 +489,187 @@ static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m,
return status;
}
static inline int dev_cmp(ip6q_queue_element_t *e, unsigned long ifindex)
static int
dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
{
if (e->info->indev)
if (e->info->indev->ifindex == ifindex)
if (entry->info->indev)
if (entry->info->indev->ifindex == ifindex)
return 1;
if (e->info->outdev)
if (e->info->outdev->ifindex == ifindex)
if (entry->info->outdev)
if (entry->info->outdev->ifindex == ifindex)
return 1;
return 0;
}
/* Drop any queued packets associated with device ifindex */
static void ip6q_dev_drop(ip6q_queue_t *q, int ifindex)
static void
ipq_dev_drop(int ifindex)
{
ip6q_queue_element_t *e;
while ((e = ip6q_dequeue(q, dev_cmp, ifindex))) {
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
}
}
struct ipq_queue_entry *entry;
/****************************************************************************
*
* Netfilter interface
*
****************************************************************************/
/*
* Packets arrive here from netfilter for queuing to userspace.
* All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
*/
static int netfilter6_receive(struct sk_buff *skb,
struct nf_info *info, void *data)
{
return ip6q_enqueue((ip6q_queue_t *)data, skb, info);
}
/****************************************************************************
*
* Netlink interface.
*
****************************************************************************/
static struct sock *nfnl = NULL;
/* This is not a static one, so we should not repeat its name */
ip6q_queue_t *nlq6 = NULL;
static struct sk_buff *netlink_build_message(ip6q_queue_element_t *e, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
ipq_packet_msg_t *pm;
struct nlmsghdr *nlh;
switch (nlq6->peer.copy_mode) {
size_t copy_range;
case IPQ_COPY_META:
size = NLMSG_SPACE(sizeof(*pm));
data_len = 0;
break;
case IPQ_COPY_PACKET:
copy_range = nlq6->peer.copy_range;
if (copy_range == 0 || copy_range > e->skb->len)
data_len = e->skb->len;
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pm) + data_len);
break;
case IPQ_COPY_NONE:
default:
*errp = -EINVAL;
return NULL;
}
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
goto nlmsg_failure;
old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pm = NLMSG_DATA(nlh);
memset(pm, 0, sizeof(*pm));
pm->packet_id = (unsigned long )e;
pm->data_len = data_len;
pm->timestamp_sec = e->skb->stamp.tv_sec;
pm->timestamp_usec = e->skb->stamp.tv_usec;
pm->mark = e->skb->nfmark;
pm->hook = e->info->hook;
if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name);
else pm->indev_name[0] = '\0';
if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
else pm->outdev_name[0] = '\0';
pm->hw_protocol = e->skb->protocol;
if (e->info->indev && e->skb->dev) {
pm->hw_type = e->skb->dev->type;
if (e->skb->dev->hard_header_parse)
pm->hw_addrlen =
e->skb->dev->hard_header_parse(e->skb,
pm->hw_addr);
}
if (data_len)
memcpy(pm->payload, e->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
NETLINK_CB(skb).dst_groups = 0;
return skb;
nlmsg_failure:
if (skb)
kfree_skb(skb);
*errp = 0;
printk(KERN_ERR "ip6_queue: error creating netlink message\n");
return NULL;
}
static int netlink_send_peer(ip6q_queue_element_t *e)
{
int status = 0;
struct sk_buff *skb;
skb = netlink_build_message(e, &status);
if (skb == NULL)
return status;
return netlink_unicast(nfnl, skb, nlq6->peer.pid, MSG_DONTWAIT);
while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
ipq_issue_verdict(entry, NF_DROP);
}
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
static inline void
ipq_rcv_skb(struct sk_buff *skb)
{
int status, type;
int status, type, pid, flags, nlmsglen, skblen;
struct nlmsghdr *nlh;
if (skb->len < sizeof(struct nlmsghdr))
skblen = skb->len;
if (skblen < sizeof(*nlh))
return;
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
|| skb->len < nlh->nlmsg_len)
nlmsglen = nlh->nlmsg_len;
if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return;
if(nlh->nlmsg_pid <= 0
|| !(nlh->nlmsg_flags & NLM_F_REQUEST)
|| nlh->nlmsg_flags & NLM_F_MULTI)
pid = nlh->nlmsg_pid;
flags = nlh->nlmsg_flags;
if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
RCV_SKB_FAIL(-EINVAL);
if (nlh->nlmsg_flags & MSG_TRUNC)
if (flags & MSG_TRUNC)
RCV_SKB_FAIL(-ECOMM);
type = nlh->nlmsg_type;
if (type < NLMSG_NOOP || type >= IPQM_MAX)
RCV_SKB_FAIL(-EINVAL);
if (type <= IPQM_BASE)
return;
if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
if (nlq6->peer.pid && !nlq6->peer.died
&& (nlq6->peer.pid != nlh->nlmsg_pid)) {
printk(KERN_WARNING "ip6_queue: peer pid changed from %d to "
"%d, flushing queue\n", nlq6->peer.pid, nlh->nlmsg_pid);
ip6q_flush(nlq6);
write_lock_bh(&queue_lock);
if (peer_pid) {
if (peer_pid != pid) {
write_unlock_bh(&queue_lock);
RCV_SKB_FAIL(-EBUSY);
}
}
nlq6->peer.pid = nlh->nlmsg_pid;
nlq6->peer.died = 0;
status = ip6q_receive_peer(nlq6, NLMSG_DATA(nlh),
type, skb->len - NLMSG_LENGTH(0));
else
peer_pid = pid;
write_unlock_bh(&queue_lock);
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
skblen - NLMSG_LENGTH(0));
if (status < 0)
RCV_SKB_FAIL(status);
if (nlh->nlmsg_flags & NLM_F_ACK)
if (flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
return;
}
/* Note: we are only dealing with single part messages at the moment. */
static void netlink_receive_user_sk(struct sock *sk, int len)
static void
ipq_rcv_sk(struct sock *sk, int len)
{
do {
struct sk_buff *skb;
if (rtnl_shlock_nowait())
if (down_trylock(&ipqnl_sem))
return;
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
netlink_receive_user_skb(skb);
ipq_rcv_skb(skb);
kfree_skb(skb);
}
up(&rtnl_sem);
} while (nfnl && nfnl->receive_queue.qlen);
}
/****************************************************************************
*
* System events
*
****************************************************************************/
up(&ipqnl_sem);
} while (ipqnl && ipqnl->receive_queue.qlen);
}
static int receive_event(struct notifier_block *this,
static int
ipq_rcv_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
/* Drop any packets associated with the downed device */
if (event == NETDEV_DOWN)
ip6q_dev_drop(nlq6, dev->ifindex);
ipq_dev_drop(dev->ifindex);
return NOTIFY_DONE;
}
struct notifier_block ip6q_dev_notifier = {
receive_event,
static struct notifier_block ipq_dev_notifier = {
ipq_rcv_dev_event,
NULL,
0
};
/****************************************************************************
*
* Sysctl - queue tuning.
*
****************************************************************************/
static int
ipq_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
if (event == NETLINK_URELEASE &&
n->protocol == NETLINK_IP6_FW && n->pid) {
write_lock_bh(&queue_lock);
if (n->pid == peer_pid)
__ipq_reset();
write_unlock_bh(&queue_lock);
}
return NOTIFY_DONE;
}
static int sysctl_maxlen = IPQ_QMAX_DEFAULT;
static struct notifier_block ipq_nl_notifier = {
ipq_rcv_nl_event,
NULL,
0
};
static struct ctl_table_header *ip6q_sysctl_header;
static int sysctl_maxlen = IPQ_QMAX_DEFAULT;
static struct ctl_table_header *ipq_sysctl_header;
static ctl_table ip6q_table[] = {
static ctl_table ipq_table[] = {
{ NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen,
sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec },
{ 0 }
};
static ctl_table ip6q_dir_table[] = {
{NET_IPV6, "ipv6", NULL, 0, 0555, ip6q_table, 0, 0, 0, 0, 0},
static ctl_table ipq_dir_table[] = {
{NET_IPV6, "ipv6", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0},
{ 0 }
};
static ctl_table ip6q_root_table[] = {
{CTL_NET, "net", NULL, 0, 0555, ip6q_dir_table, 0, 0, 0, 0, 0},
static ctl_table ipq_root_table[] = {
{CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0},
{ 0 }
};
/****************************************************************************
*
* Procfs - debugging info.
*
****************************************************************************/
static int ip6q_get_info(char *buffer, char **start, off_t offset, int length)
static int
ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
int len;
spin_lock_bh(&nlq6->lock);
read_lock_bh(&queue_lock);
len = sprintf(buffer,
"Peer pid : %d\n"
"Peer died : %d\n"
"Peer copy mode : %d\n"
"Peer copy range : %Zu\n"
"Queue length : %d\n"
"Queue max. length : %d\n"
"Queue flushing : %d\n"
"Queue terminate : %d\n",
nlq6->peer.pid,
nlq6->peer.died,
nlq6->peer.copy_mode,
nlq6->peer.copy_range,
nlq6->len,
*nlq6->maxlen,
nlq6->flushing,
nlq6->terminate);
spin_unlock_bh(&nlq6->lock);
"Peer PID : %d\n"
"Copy mode : %hu\n"
"Copy range : %u\n"
"Queue length : %u\n"
"Queue max. length : %u\n",
peer_pid,
copy_mode,
copy_range,
queue_total,
queue_maxlen);
read_unlock_bh(&queue_lock);
*start = buffer + offset;
len -= offset;
if (len > length)
......@@ -666,52 +679,70 @@ static int ip6q_get_info(char *buffer, char **start, off_t offset, int length)
return len;
}
/****************************************************************************
*
* Module stuff.
*
****************************************************************************/
static int __init init(void)
static int
init_or_cleanup(int init)
{
int status = 0;
int status = -ENOMEM;
struct proc_dir_entry *proc;
/* We must create the NETLINK_IP6_FW protocol service */
nfnl = netlink_kernel_create(NETLINK_IP6_FW, netlink_receive_user_sk);
if (nfnl == NULL) {
printk(KERN_ERR "ip6_queue: initialisation failed: unable to "
"create kernel netlink socket\n");
return -ENOMEM;
}
nlq6 = ip6q_create_queue(netfilter6_receive,
netlink_send_peer, &status, &sysctl_maxlen);
if (nlq6 == NULL) {
printk(KERN_ERR "ip6_queue: initialisation failed: unable to "
"create queue\n");
sock_release(nfnl->socket);
return status;
if (!init)
goto cleanup;
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
if (ipqnl == NULL) {
printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
/* The file will be /proc/net/ip6_queue */
proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ip6q_get_info);
if (proc) proc->owner = THIS_MODULE;
proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
if (proc)
proc->owner = THIS_MODULE;
else {
ip6q_destroy_queue(nlq6);
sock_release(nfnl->socket);
return -ENOMEM;
printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
goto cleanup_ipqnl;
}
register_netdevice_notifier(&ip6q_dev_notifier);
ip6q_sysctl_header = register_sysctl_table(ip6q_root_table, 0);
register_netdevice_notifier(&ipq_dev_notifier);
ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
if (status < 0) {
printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
goto cleanup_sysctl;
}
return status;
cleanup:
nf_unregister_queue_handler(PF_INET6);
br_write_lock_bh(BR_NETPROTO_LOCK);
br_write_unlock_bh(BR_NETPROTO_LOCK);
ipq_flush(NF_DROP);
cleanup_sysctl:
unregister_sysctl_table(ipq_sysctl_header);
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(IPQ_PROC_FS_NAME);
cleanup_ipqnl:
sock_release(ipqnl->socket);
down(&ipqnl_sem);
up(&ipqnl_sem);
cleanup_netlink_notifier:
netlink_unregister_notifier(&ipq_nl_notifier);
return status;
}
static int __init init(void)
{
return init_or_cleanup(1);
}
static void __exit fini(void)
{
unregister_sysctl_table(ip6q_sysctl_header);
proc_net_remove(IPQ_PROC_FS_NAME);
unregister_netdevice_notifier(&ip6q_dev_notifier);
ip6q_destroy_queue(nlq6);
sock_release(nfnl->socket);
init_or_cleanup(0);
}
MODULE_DESCRIPTION("IPv6 packet queue handler");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment