Commit dacc62db authored by David S. Miller's avatar David S. Miller
parents 47abf28d c051a0a2
......@@ -21,11 +21,103 @@
#include <linux/timer.h>
#include <net/checksum.h>
#include <linux/netfilter.h> /* for union nf_inet_addr */
#include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h> /* for ipv6_addr_copy */
struct ip_vs_iphdr {
int len;
__u8 protocol;
union nf_inet_addr saddr;
union nf_inet_addr daddr;
};
static inline void
ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
const struct ipv6hdr *iph = nh;
iphdr->len = sizeof(struct ipv6hdr);
iphdr->protocol = iph->nexthdr;
ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
} else
#endif
{
const struct iphdr *iph = nh;
iphdr->len = iph->ihl * 4;
iphdr->protocol = iph->protocol;
iphdr->saddr.ip = iph->saddr;
iphdr->daddr.ip = iph->daddr;
}
}
static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
const union nf_inet_addr *src)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ipv6_addr_copy(&dst->in6, &src->in6);
else
#endif
dst->ip = src->ip;
}
static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
const union nf_inet_addr *b)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
return ipv6_addr_equal(&a->in6, &b->in6);
#endif
return a->ip == b->ip;
}
#ifdef CONFIG_IP_VS_DEBUG
#include <linux/net.h>
extern int ip_vs_get_debug_level(void);
static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
const union nf_inet_addr *addr,
int *idx)
{
int len;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]",
NIP6(addr->in6)) + 1;
else
#endif
len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
NIPQUAD(addr->ip)) + 1;
*idx += len;
BUG_ON(*idx > buf_len + 1);
return &buf[*idx - len];
}
#define IP_VS_DBG_BUF(level, msg...) \
do { \
char ip_vs_dbg_buf[160]; \
int ip_vs_dbg_idx = 0; \
if (level <= ip_vs_get_debug_level()) \
printk(KERN_DEBUG "IPVS: " msg); \
} while (0)
#define IP_VS_ERR_BUF(msg...) \
do { \
char ip_vs_dbg_buf[160]; \
int ip_vs_dbg_idx = 0; \
printk(KERN_ERR "IPVS: " msg); \
} while (0)
/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
#define IP_VS_DBG_ADDR(af, addr) \
ip_vs_dbg_addr(af, ip_vs_dbg_buf, \
sizeof(ip_vs_dbg_buf), addr, \
&ip_vs_dbg_idx)
#define IP_VS_DBG(level, msg...) \
do { \
if (level <= ip_vs_get_debug_level()) \
......@@ -48,6 +140,8 @@ extern int ip_vs_get_debug_level(void);
pp->debug_packet(pp, skb, ofs, msg); \
} while (0)
#else /* NO DEBUGGING at ALL */
#define IP_VS_DBG_BUF(level, msg...) do {} while (0)
#define IP_VS_ERR_BUF(msg...) do {} while (0)
#define IP_VS_DBG(level, msg...) do {} while (0)
#define IP_VS_DBG_RL(msg...) do {} while (0)
#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0)
......@@ -160,27 +254,10 @@ struct ip_vs_estimator {
struct ip_vs_stats
{
__u32 conns; /* connections scheduled */
__u32 inpkts; /* incoming packets */
__u32 outpkts; /* outgoing packets */
__u64 inbytes; /* incoming bytes */
__u64 outbytes; /* outgoing bytes */
__u32 cps; /* current connection rate */
__u32 inpps; /* current in packet rate */
__u32 outpps; /* current out packet rate */
__u32 inbps; /* current in byte rate */
__u32 outbps; /* current out byte rate */
/*
* Don't add anything before the lock, because we use memcpy() to copy
* the members before the lock to struct ip_vs_stats_user in
* ip_vs_ctl.c.
*/
struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_estimator est; /* estimator */
spinlock_t lock; /* spin lock */
struct ip_vs_estimator est; /* estimator */
};
struct dst_entry;
......@@ -202,21 +279,23 @@ struct ip_vs_protocol {
void (*exit)(struct ip_vs_protocol *pp);
int (*conn_schedule)(struct sk_buff *skb,
int (*conn_schedule)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
(*conn_in_get)(const struct sk_buff *skb,
(*conn_in_get)(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct iphdr *iph,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
struct ip_vs_conn *
(*conn_out_get)(const struct sk_buff *skb,
(*conn_out_get)(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct iphdr *iph,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
......@@ -226,7 +305,8 @@ struct ip_vs_protocol {
int (*dnat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
int (*csum_check)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp);
const char *(*state_name)(int state);
......@@ -259,9 +339,10 @@ struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
/* Protocol, addresses and port numbers */
__be32 caddr; /* client address */
__be32 vaddr; /* virtual address */
__be32 daddr; /* destination address */
u16 af; /* address family */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
__be16 cport;
__be16 vport;
__be16 dport;
......@@ -304,6 +385,45 @@ struct ip_vs_conn {
};
/*
* Extended internal versions of struct ip_vs_service_user and
* ip_vs_dest_user for IPv6 support.
*
* We need these to conveniently pass around service and destination
* options, but unfortunately, we also need to keep the old definitions to
* maintain userspace backwards compatibility for the setsockopt interface.
*/
struct ip_vs_service_user_kern {
/* virtual service addresses */
u16 af;
u16 protocol;
union nf_inet_addr addr; /* virtual ip address */
u16 port;
u32 fwmark; /* firwall mark of service */
/* virtual service options */
char *sched_name;
unsigned flags; /* virtual service flags */
unsigned timeout; /* persistent timeout in sec */
u32 netmask; /* persistent netmask */
};
struct ip_vs_dest_user_kern {
/* destination server address */
union nf_inet_addr addr;
u16 port;
/* real server options */
unsigned conn_flags; /* connection flags */
int weight; /* destination weight */
/* thresholds for active connections */
u32 u_threshold; /* upper threshold */
u32 l_threshold; /* lower threshold */
};
/*
* The information about the virtual service offered to the net
* and the forwarding entries
......@@ -314,8 +434,9 @@ struct ip_vs_service {
atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */
u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
__be32 addr; /* IP address for virtual service */
union nf_inet_addr addr; /* IP address for virtual service */
__be16 port; /* port number for the service */
__u32 fwmark; /* firewall mark of the service */
unsigned flags; /* service status flags */
......@@ -342,7 +463,8 @@ struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
struct list_head d_list; /* for table with all the dests */
__be32 addr; /* IP address of the server */
u16 af; /* address family */
union nf_inet_addr addr; /* IP address of the server */
__be16 port; /* port number of the server */
volatile unsigned flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
......@@ -366,7 +488,7 @@ struct ip_vs_dest {
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
__be32 vaddr; /* virtual IP address */
union nf_inet_addr vaddr; /* virtual IP address */
__be16 vport; /* virtual port number */
__u32 vfwmark; /* firewall mark of service */
};
......@@ -380,6 +502,9 @@ struct ip_vs_scheduler {
char *name; /* scheduler name */
atomic_t refcnt; /* reference counter */
struct module *module; /* THIS_MODULE/NULL */
#ifdef CONFIG_IP_VS_IPV6
int supports_ipv6; /* scheduler has IPv6 support */
#endif
/* scheduler initializing service */
int (*init_service)(struct ip_vs_service *svc);
......@@ -479,16 +604,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
#ifndef CONFIG_IP_VS_TAB_BITS
#define CONFIG_IP_VS_TAB_BITS 12
#endif
/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
#if CONFIG_IP_VS_TAB_BITS < 8
#define IP_VS_CONN_TAB_BITS 8
#endif
#if CONFIG_IP_VS_TAB_BITS > 20
#define IP_VS_CONN_TAB_BITS 20
#endif
#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS
#endif
#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS)
#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1)
......@@ -500,11 +617,16 @@ enum {
};
extern struct ip_vs_conn *ip_vs_conn_in_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port);
extern struct ip_vs_conn *ip_vs_ct_in_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port);
extern struct ip_vs_conn *ip_vs_conn_out_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port);
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
......@@ -515,8 +637,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp);
extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
extern struct ip_vs_conn *
ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
__be32 daddr, __be16 dport, unsigned flags,
ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
......@@ -532,24 +655,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp)
{
struct ip_vs_conn *ctl_cp = cp->control;
if (!ctl_cp) {
IP_VS_ERR("request control DEL for uncontrolled: "
"%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(cp->vaddr),ntohs(cp->vport));
IP_VS_ERR_BUF("request control DEL for uncontrolled: "
"%s:%d to %s:%d\n",
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
ntohs(cp->vport));
return;
}
IP_VS_DBG(7, "DELeting control for: "
"cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
IP_VS_DBG_BUF(7, "DELeting control for: "
"cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
ntohs(ctl_cp->cport));
cp->control = NULL;
if (atomic_read(&ctl_cp->n_control) == 0) {
IP_VS_ERR("BUG control DEL with n=0 : "
"%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(cp->vaddr),ntohs(cp->vport));
IP_VS_ERR_BUF("BUG control DEL with n=0 : "
"%s:%d to %s:%d\n",
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
ntohs(cp->vport));
return;
}
atomic_dec(&ctl_cp->n_control);
......@@ -559,17 +690,22 @@ static inline void
ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
{
if (cp->control) {
IP_VS_ERR("request control ADD for already controlled: "
"%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(cp->vaddr),ntohs(cp->vport));
IP_VS_ERR_BUF("request control ADD for already controlled: "
"%s:%d to %s:%d\n",
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
ntohs(cp->vport));
ip_vs_control_del(cp);
}
IP_VS_DBG(7, "ADDing control for: "
"cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
NIPQUAD(cp->caddr),ntohs(cp->cport),
NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
IP_VS_DBG_BUF(7, "ADDing control for: "
"cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
ntohs(ctl_cp->cport));
cp->control = ctl_cp;
atomic_inc(&ctl_cp->n_control);
......@@ -647,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern struct ip_vs_service *
ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
......@@ -655,14 +792,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
}
extern struct ip_vs_dest *
ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport);
ip_vs_lookup_real_service(int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
ip_vs_find_dest(__be32 daddr, __be16 dport,
__be32 vaddr, __be16 vport, __u16 protocol);
ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
......@@ -706,6 +845,19 @@ extern int ip_vs_icmp_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_nat_xmit_v6
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_tunnel_xmit_v6
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_dr_xmit_v6
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_icmp_xmit_v6
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
int offset);
#endif
/*
* This is a simple mechanism to ignore packets when
......@@ -750,7 +902,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
}
extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int dir);
struct ip_vs_conn *cp, int dir);
#ifdef CONFIG_IP_VS_IPV6
extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int dir);
#endif
extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
......@@ -761,6 +918,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
return csum_partial((char *) diff, sizeof(diff), oldsum);
}
#ifdef CONFIG_IP_VS_IPV6
static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
__wsum oldsum)
{
__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
new[3], new[2], new[1], new[0] };
return csum_partial((char *) diff, sizeof(diff), oldsum);
}
#endif
static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
{
__be16 diff[2] = { ~old, new };
......
......@@ -24,6 +24,14 @@ menuconfig IP_VS
if IP_VS
config IP_VS_IPV6
bool "IPv6 support for IPVS (DANGEROUS)"
depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
---help---
Add IPv6 support to IPVS. This is incomplete and might be dangerous.
Say N if unsure.
config IP_VS_DEBUG
bool "IP virtual server debugging"
---help---
......@@ -33,7 +41,8 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
default "12"
range 8 20
default 12
---help---
The IPVS connection hash table uses the chaining scheme to handle
hash collisions. Using a big IPVS connection hash table will greatly
......
......@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
const union nf_inet_addr *addr,
__be16 port)
{
return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd)
& IP_VS_CONN_TAB_MASK;
#endif
return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd)
& IP_VS_CONN_TAB_MASK;
}
......@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
int ret;
/* Hash by protocol, client address and port */
hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
......@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
int ret;
/* unhash it and decrease its reference counter */
hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
......@@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
* d_addr, d_port: pkt dest address (load balancer)
*/
static inline struct ip_vs_conn *__ip_vs_conn_in_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (s_addr==cp->caddr && s_port==cp->cport &&
d_port==cp->vport && d_addr==cp->vaddr &&
if (cp->af == af &&
ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
s_port == cp->cport && d_port == cp->vport &&
((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
protocol==cp->protocol) {
protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
......@@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
}
struct ip_vs_conn *ip_vs_conn_in_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port)
{
struct ip_vs_conn *cp;
cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
d_port);
IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
cp?"hit":"not hit");
IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
cp ? "hit" : "not hit");
return cp;
}
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (s_addr==cp->caddr && s_port==cp->cport &&
d_port==cp->vport && d_addr==cp->vaddr &&
if (cp->af == af &&
ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
s_port == cp->cport && d_port == cp->vport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
protocol==cp->protocol) {
protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
goto out;
......@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
out:
ct_read_unlock(hash);
IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
cp?"hit":"not hit");
IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
cp ? "hit" : "not hit");
return cp;
}
......@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
* d_addr, d_port: pkt dest address (foreign host)
*/
struct ip_vs_conn *ip_vs_conn_out_get
(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp, *ret=NULL;
......@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
/*
* Check for "full" addressed entries
*/
hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (d_addr == cp->caddr && d_port == cp->cport &&
s_port == cp->dport && s_addr == cp->daddr &&
if (cp->af == af &&
ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
d_port == cp->cport && s_port == cp->dport &&
protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
......@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
ct_read_unlock(hash);
IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
ip_vs_proto_name(protocol),
NIPQUAD(s_addr), ntohs(s_port),
NIPQUAD(d_addr), ntohs(d_port),
ret?"hit":"not hit");
IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
ret ? "hit" : "not hit");
return ret;
}
......@@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
}
}
#ifdef CONFIG_IP_VS_IPV6
static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
{
switch (IP_VS_FWD_METHOD(cp)) {
case IP_VS_CONN_F_MASQ:
cp->packet_xmit = ip_vs_nat_xmit_v6;
break;
case IP_VS_CONN_F_TUNNEL:
cp->packet_xmit = ip_vs_tunnel_xmit_v6;
break;
case IP_VS_CONN_F_DROUTE:
cp->packet_xmit = ip_vs_dr_xmit_v6;
break;
case IP_VS_CONN_F_LOCALNODE:
cp->packet_xmit = ip_vs_null_xmit;
break;
case IP_VS_CONN_F_BYPASS:
cp->packet_xmit = ip_vs_bypass_xmit_v6;
break;
}
}
#endif
static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
{
......@@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
"d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
"dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
NIPQUAD(cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
"d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
"dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
......@@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(cp->daddr, cp->dport,
cp->vaddr, cp->vport, cp->protocol);
dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
cp->protocol);
ip_vs_bind_dest(cp, dest);
return dest;
} else
......@@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
if (!dest)
return;
IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
"d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
"dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
NIPQUAD(cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
"d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
"dest->refcnt:%d\n",
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
......@@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
(sysctl_ip_vs_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG(9, "check_template: dest not available for "
"protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
"-> d:%u.%u.%u.%u:%d\n",
ip_vs_proto_name(ct->protocol),
NIPQUAD(ct->caddr), ntohs(ct->cport),
NIPQUAD(ct->vaddr), ntohs(ct->vport),
NIPQUAD(ct->daddr), ntohs(ct->dport));
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
"-> d:%s:%d\n",
ip_vs_proto_name(ct->protocol),
IP_VS_DBG_ADDR(ct->af, &ct->caddr),
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
IP_VS_DBG_ADDR(ct->af, &ct->daddr),
ntohs(ct->dport));
/*
* Invalidate the connection template
......@@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
__be32 daddr, __be16 dport, unsigned flags,
ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest)
{
struct ip_vs_conn *cp;
......@@ -640,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
cp->af = af;
cp->protocol = proto;
cp->caddr = caddr;
ip_vs_addr_copy(af, &cp->caddr, caddr);
cp->cport = cport;
cp->vaddr = vaddr;
ip_vs_addr_copy(af, &cp->vaddr, vaddr);
cp->vport = vport;
cp->daddr = daddr;
ip_vs_addr_copy(af, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
spin_lock_init(&cp->lock);
......@@ -672,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
cp->timeout = 3*HZ;
/* Bind its packet transmitter */
ip_vs_bind_xmit(cp);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_bind_xmit_v6(cp);
else
#endif
ip_vs_bind_xmit(cp);
if (unlikely(pp && atomic_read(&pp->appcnt)))
ip_vs_bind_app(cp, pp);
......@@ -760,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
else {
const struct ip_vs_conn *cp = v;
seq_printf(seq,
"%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq,
"%-3s " NIP6_FMT " %04X " NIP6_FMT
" %04X " NIP6_FMT " %04X %-11s %7lu\n",
ip_vs_proto_name(cp->protocol),
NIP6(cp->caddr.in6), ntohs(cp->cport),
NIP6(cp->vaddr.in6), ntohs(cp->vport),
NIP6(cp->daddr.in6), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
" %08X %04X %-11s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr), ntohs(cp->cport),
ntohl(cp->vaddr), ntohs(cp->vport),
ntohl(cp->daddr), ntohs(cp->dport),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
ntohl(cp->daddr.ip), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ);
}
......@@ -809,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
else {
const struct ip_vs_conn *cp = v;
seq_printf(seq,
"%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq,
"%-3s " NIP6_FMT " %04X " NIP6_FMT
" %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
NIP6(cp->caddr.in6), ntohs(cp->cport),
NIP6(cp->vaddr.in6), ntohs(cp->vport),
NIP6(cp->daddr.in6), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
"%08X %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr), ntohs(cp->cport),
ntohl(cp->vaddr), ntohs(cp->vport),
ntohl(cp->daddr), ntohs(cp->dport),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
ntohl(cp->daddr.ip), ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
......
......@@ -39,6 +39,11 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <linux/netfilter_ipv6.h>
#endif
#include <net/ip_vs.h>
......@@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
/* ID used in ICMP lookups */
#define icmp_id(icmph) (((icmph)->un).echo.id)
#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier)
const char *ip_vs_proto_name(unsigned proto)
{
......@@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto)
return "TCP";
case IPPROTO_ICMP:
return "ICMP";
#ifdef CONFIG_IP_VS_IPV6
case IPPROTO_ICMPV6:
return "ICMPv6";
#endif
default:
sprintf(buf, "IP_%d", proto);
return buf;
......@@ -92,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_dest *dest = cp->dest;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
dest->stats.inpkts++;
dest->stats.inbytes += skb->len;
dest->stats.ustats.inpkts++;
dest->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->stats.lock);
spin_lock(&dest->svc->stats.lock);
dest->svc->stats.inpkts++;
dest->svc->stats.inbytes += skb->len;
dest->svc->stats.ustats.inpkts++;
dest->svc->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.inpkts++;
ip_vs_stats.inbytes += skb->len;
ip_vs_stats.ustats.inpkts++;
ip_vs_stats.ustats.inbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
}
}
......@@ -115,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_dest *dest = cp->dest;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
dest->stats.outpkts++;
dest->stats.outbytes += skb->len;
dest->stats.ustats.outpkts++;
dest->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->stats.lock);
spin_lock(&dest->svc->stats.lock);
dest->svc->stats.outpkts++;
dest->svc->stats.outbytes += skb->len;
dest->svc->stats.ustats.outpkts++;
dest->svc->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.outpkts++;
ip_vs_stats.outbytes += skb->len;
ip_vs_stats.ustats.outpkts++;
ip_vs_stats.ustats.outbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
}
}
......@@ -136,15 +146,15 @@ static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
spin_lock(&cp->dest->stats.lock);
cp->dest->stats.conns++;
cp->dest->stats.ustats.conns++;
spin_unlock(&cp->dest->stats.lock);
spin_lock(&svc->stats.lock);
svc->stats.conns++;
svc->stats.ustats.conns++;
spin_unlock(&svc->stats.lock);
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.conns++;
ip_vs_stats.ustats.conns++;
spin_unlock(&ip_vs_stats.lock);
}
......@@ -173,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
__be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
__be16 dport; /* destination port to forward */
__be32 snet; /* source network of the client, after masking */
__be16 dport; /* destination port to forward */
union nf_inet_addr snet; /* source network of the client,
after masking */
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
/* Mask saddr with the netmask to adjust template granularity */
snet = iph->saddr & svc->netmask;
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
else
#endif
snet.ip = iph.saddr.ip & svc->netmask;
IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
"mnet %u.%u.%u.%u\n",
NIPQUAD(iph->saddr), ntohs(ports[0]),
NIPQUAD(iph->daddr), ntohs(ports[1]),
NIPQUAD(snet));
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
* As far as we know, FTP is a very complicated network protocol, and
......@@ -204,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
if (ports[1] == svc->port) {
/* Check if a template already exists */
if (svc->port != FTPPORT)
ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, ports[1]);
ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
&iph.daddr, ports[1]);
else
ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
&iph.daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
/*
......@@ -228,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* for ftp service.
*/
if (svc->port != FTPPORT)
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr,
ct = ip_vs_conn_new(svc->af, iph.protocol,
&snet, 0,
&iph.daddr,
ports[1],
dest->addr, dest->port,
&dest->addr, dest->port,
IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
ct = ip_vs_conn_new(svc->af, iph.protocol,
&snet, 0,
&iph.daddr, 0,
&dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
......@@ -258,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
* port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
*/
if (svc->fwmark)
ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
htonl(svc->fwmark), 0);
else
ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
iph->daddr, 0);
if (svc->fwmark) {
union nf_inet_addr fwmark = {
.all = { 0, 0, 0, htonl(svc->fwmark) }
};
ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
&fwmark, 0);
} else
ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
&iph.daddr, 0);
if (!ct || !ip_vs_check_template(ct)) {
/*
......@@ -282,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a template according to the service
*/
if (svc->fwmark)
ct = ip_vs_conn_new(IPPROTO_IP,
snet, 0,
htonl(svc->fwmark), 0,
dest->addr, 0,
if (svc->fwmark) {
union nf_inet_addr fwmark = {
.all = { 0, 0, 0, htonl(svc->fwmark) }
};
ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
&snet, 0,
&fwmark, 0,
&dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
else
ct = ip_vs_conn_new(iph->protocol,
snet, 0,
iph->daddr, 0,
dest->addr, 0,
} else
ct = ip_vs_conn_new(svc->af, iph.protocol,
&snet, 0,
&iph.daddr, 0,
&dest->addr, 0,
IP_VS_CONN_F_TEMPLATE,
dest);
if (ct == NULL)
......@@ -310,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
cp = ip_vs_conn_new(iph->protocol,
iph->saddr, ports[0],
iph->daddr, ports[1],
dest->addr, dport,
cp = ip_vs_conn_new(svc->af, iph.protocol,
&iph.saddr, ports[0],
&iph.daddr, ports[1],
&dest->addr, dport,
0,
dest);
if (cp == NULL) {
......@@ -342,12 +368,12 @@ struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
......@@ -377,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/*
* Create a connection entry.
*/
cp = ip_vs_conn_new(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1],
dest->addr, dest->port?dest->port:pptr[1],
cp = ip_vs_conn_new(svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1],
&dest->addr, dest->port ? dest->port : pptr[1],
0,
dest);
if (cp == NULL)
return NULL;
IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
"d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
NIPQUAD(cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
......@@ -408,20 +434,27 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
__be16 _ports[2], *pptr;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_iphdr iph;
int unicast;
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
}
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is RTN_UNICAST (and not local), then create
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
if (sysctl_ip_vs_cache_bypass && svc->fwmark
&& (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
......@@ -429,9 +462,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/* create a new connection entry */
IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
cp = ip_vs_conn_new(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1],
cp = ip_vs_conn_new(svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1],
0, 0,
IP_VS_CONN_F_BYPASS,
NULL);
......@@ -473,7 +506,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* created, the TCP RST packet cannot be sent, instead that
* ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
*/
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
skb->dev);
else
#endif
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
return NF_DROP;
}
......@@ -512,6 +552,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
#ifdef CONFIG_IP_VS_IPV6
static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
{
/* TODO IPv6: Find out what to do here for IPv6 */
return 0;
}
#endif
/*
* Packet has been made sufficiently writable in caller
* - inout: 1=in->out, 0=out->in
......@@ -526,14 +574,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct iphdr *ciph = (struct iphdr *)(icmph + 1);
if (inout) {
iph->saddr = cp->vaddr;
iph->saddr = cp->vaddr.ip;
ip_send_check(iph);
ciph->daddr = cp->vaddr;
ciph->daddr = cp->vaddr.ip;
ip_send_check(ciph);
} else {
iph->daddr = cp->daddr;
iph->daddr = cp->daddr.ip;
ip_send_check(iph);
ciph->saddr = cp->daddr;
ciph->saddr = cp->daddr.ip;
ip_send_check(ciph);
}
......@@ -560,21 +608,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
"Forwarding altered incoming ICMP");
}
#ifdef CONFIG_IP_VS_IPV6
void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int inout)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
unsigned int icmp_offset = sizeof(struct ipv6hdr);
struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) +
icmp_offset);
struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1);
if (inout) {
iph->saddr = cp->vaddr.in6;
ciph->daddr = cp->vaddr.in6;
} else {
iph->daddr = cp->daddr.in6;
ciph->saddr = cp->daddr.in6;
}
/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
if (inout)
ports[1] = cp->vport;
else
ports[0] = cp->dport;
}
/* And finally the ICMP checksum */
icmph->icmp6_cksum = 0;
/* TODO IPv6: is this correct for ICMPv6? */
ip_vs_checksum_complete(skb, icmp_offset);
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (inout)
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered outgoing ICMPv6");
else
IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered incoming ICMPv6");
}
#endif
/* Handle relevant response ICMP messages - forward to the right
* destination host. Used for NAT and local client.
*/
static int handle_response_icmp(int af, struct sk_buff *skb,
union nf_inet_addr *snet,
__u8 protocol, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
IP_VS_ERR("shouldn't reach here, because the box is on the "
"half connection in the tun/dr module.\n");
}
/* Ensure the checksum is correct */
if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n",
IP_VS_DBG_ADDR(af, snet));
goto out;
}
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
else
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
skb->ipvs_property = 1;
verdict = NF_ACCEPT;
out:
__ip_vs_conn_put(cp);
return verdict;
}
/*
* Handle ICMP messages in the inside-to-outside direction (outgoing).
* Find any that might be relevant, check against existing connections,
* forward to the right destination host if relevant.
* Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
* (Only used in VS/NAT)
*/
static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
unsigned int offset, ihl;
union nf_inet_addr snet;
*related = 1;
......@@ -627,102 +766,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
offset += cih->ihl * 4;
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(skb, pp, cih, offset, 1);
cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
verdict = NF_DROP;
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
pp, offset, ihl);
}
if (IP_VS_FWD_METHOD(cp) != 0) {
IP_VS_ERR("shouldn't reach here, because the box is on the "
"half connection in the tun/dr module.\n");
#ifdef CONFIG_IP_VS_IPV6
static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
{
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset;
union nf_inet_addr snet;
*related = 1;
/* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
}
/* Ensure the checksum is correct */
if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
goto out;
iph = ipv6_hdr(skb);
offset = sizeof(struct ipv6hdr);
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
ic->icmp6_type, ntohs(icmpv6_id(ic)),
NIP6(iph->saddr), NIP6(iph->daddr));
/*
* Work through seeing if this is for us.
* These checks are supposed to be in an order that means easy
* things are checked first to speed up processing.... however
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
(ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
(ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
*related = 0;
return NF_ACCEPT;
}
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
/* Now find the contained IP header */
offset += sizeof(_icmph);
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ip_vs_nat_icmp(skb, pp, cp, 1);
pp = ip_vs_proto_get(cih->nexthdr);
if (!pp)
return NF_ACCEPT;
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT;
skb->ipvs_property = 1;
verdict = NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
out:
__ip_vs_conn_put(cp);
offset += sizeof(struct ipv6hdr);
return verdict;
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
pp, offset, sizeof(struct ipv6hdr));
}
#endif
static inline int is_tcp_reset(const struct sk_buff *skb)
static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
return th->rst;
}
/* Handle response packets: rewrite addresses and send away...
* Used for NAT and local client.
*/
static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int ihl)
{
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
goto drop;
/* mangle the packet */
if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
goto drop;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ipv6_hdr(skb)->saddr = cp->vaddr.in6;
else
#endif
{
ip_hdr(skb)->saddr = cp->vaddr.ip;
ip_send_check(ip_hdr(skb));
}
/* For policy routing, packets originating from this
* machine itself may be routed differently to packets
* passing through. We want this packet to be routed as
* if it came from this machine itself. So re-compute
* the routing information.
*/
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_conn_put(cp);
skb->ipvs_property = 1;
LeaveFunction(11);
return NF_ACCEPT;
drop:
ip_vs_conn_put(cp);
kfree_skb(skb);
return NF_STOLEN;
}
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if outgoing packet belongs to the established ip_vs_conn,
* rewrite addresses of the packet and send it on its way...
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
int ihl;
int af;
EnterFunction(11);
af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
if (skb->ipvs_property)
return NF_ACCEPT;
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(skb, &related);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related, verdict = ip_vs_out_icmp_v6(skb, &related);
if (related)
return verdict;
iph = ip_hdr(skb);
}
if (related)
return verdict;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(skb, &related);
pp = ip_vs_proto_get(iph->protocol);
if (related)
return verdict;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
return NF_ACCEPT;
/* reassemble IP fragments */
if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
iph = ip_hdr(skb);
}
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related, verdict = ip_vs_out_icmp_v6(skb, &related);
if (related)
return verdict;
ihl = iph->ihl << 2;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
......@@ -730,21 +998,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
pp->protocol == IPPROTO_UDP)) {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, ihl,
pptr = skb_header_pointer(skb, iph.len,
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(iph->protocol,
iph->saddr, pptr[0])) {
if (ip_vs_lookup_real_service(af, iph.protocol,
&iph.saddr,
pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
* packet or not TCP packet.
*/
if (iph->protocol != IPPROTO_TCP
|| !is_tcp_reset(skb)) {
icmp_send(skb,ICMP_DEST_UNREACH,
ICMP_PORT_UNREACH, 0);
if (iph.protocol != IPPROTO_TCP
|| !is_tcp_reset(skb, iph.len)) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
0, skb->dev);
else
#endif
icmp_send(skb,
ICMP_DEST_UNREACH,
ICMP_PORT_UNREACH, 0);
return NF_DROP;
}
}
......@@ -754,41 +1032,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
return NF_ACCEPT;
}
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
goto drop;
/* mangle the packet */
if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
goto drop;
ip_hdr(skb)->saddr = cp->vaddr;
ip_send_check(ip_hdr(skb));
/* For policy routing, packets originating from this
* machine itself may be routed differently to packets
* passing through. We want this packet to be routed as
* if it came from this machine itself. So re-compute
* the routing information.
*/
if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_conn_put(cp);
skb->ipvs_property = 1;
LeaveFunction(11);
return NF_ACCEPT;
drop:
ip_vs_conn_put(cp);
kfree_skb(skb);
return NF_STOLEN;
return handle_response(af, skb, pp, cp, iph.len);
}
......@@ -804,9 +1048,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, ihl, verdict;
union nf_inet_addr snet;
*related = 1;
......@@ -860,10 +1106,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
offset += cih->ihl * 4;
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(skb, pp, cih, offset, 1);
if (!cp)
cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
if (cp) {
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet,
cih->protocol, cp, pp,
offset, ihl);
}
return NF_ACCEPT;
}
verdict = NF_DROP;
......@@ -888,6 +1144,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
static int
ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
{
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
unsigned int offset, verdict;
union nf_inet_addr snet;
*related = 1;
/* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
IP_DEFRAG_VS_IN :
IP_DEFRAG_VS_FWD))
return NF_STOLEN;
}
iph = ipv6_hdr(skb);
offset = sizeof(struct ipv6hdr);
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n",
ic->icmp6_type, ntohs(icmpv6_id(ic)),
NIP6(iph->saddr), NIP6(iph->daddr));
/*
* Work through seeing if this is for us.
* These checks are supposed to be in an order that means easy
* things are checked first to speed up processing.... however
* this means that some packets will manage to get a long way
* down this stack and then be rejected, but that's life.
*/
if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) &&
(ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
(ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
*related = 0;
return NF_ACCEPT;
}
/* Now find the contained IP header */
offset += sizeof(_icmph);
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
pp = ip_vs_proto_get(cih->nexthdr);
if (!pp)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT;
IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
offset += sizeof(struct ipv6hdr);
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
if (cp) {
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet,
cih->nexthdr,
cp, pp, offset,
sizeof(struct ipv6hdr));
}
return NF_ACCEPT;
}
verdict = NF_DROP;
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */
__ip_vs_conn_put(cp);
return verdict;
}
#endif
/*
* Check if it's for virtual services, look it up,
* and send it on its way...
......@@ -897,50 +1252,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
int ret, restart;
int ihl;
int ret, restart, af;
af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/*
* Big tappo: only PACKET_HOST (neither loopback nor mcasts)
* ... don't know why 1st test DOES NOT include 2nd (?)
* Big tappo: only PACKET_HOST, including loopback for local client
* Don't handle local packets on IPv6 for now
*/
if (unlikely(skb->pkt_type != PACKET_HOST
|| skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
skb->pkt_type,
ip_hdr(skb)->protocol,
NIPQUAD(ip_hdr(skb)->daddr));
if (unlikely(skb->pkt_type != PACKET_HOST)) {
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
skb->pkt_type,
iph.protocol,
IP_VS_DBG_ADDR(af, &iph.daddr));
return NF_ACCEPT;
}
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
iph = ip_hdr(skb);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
/* Protocol supported? */
pp = ip_vs_proto_get(iph->protocol);
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
return NF_ACCEPT;
ihl = iph->ihl << 2;
/*
* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
if (!pp->conn_schedule(skb, pp, &v, &cp))
/* For local client packets, it could be a response */
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (cp)
return handle_response(af, skb, pp, cp, iph.len);
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v;
}
......@@ -984,7 +1343,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
atomic_inc(&cp->in_pkts);
if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
......@@ -1023,6 +1383,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
return ip_vs_in_icmp(skb, &r, hooknum);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
int r;
if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
return NF_ACCEPT;
return ip_vs_in_icmp_v6(skb, &r, hooknum);
}
#endif
static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, forward packet through VS/DR, VS/TUN,
......@@ -1060,6 +1435,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC-1,
},
#ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
.hook = ip_vs_in,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_LOCAL_IN,
.priority = 100,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_out,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp_v6,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* Before the netfilter connection tracking, exit from POST_ROUTING */
{
.hook = ip_vs_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC-1,
},
#endif
};
......
......@@ -35,6 +35,10 @@
#include <net/net_namespace.h>
#include <net/ip.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <net/ip6_route.h>
#endif
#include <net/route.h>
#include <net/sock.h>
#include <net/genetlink.h>
......@@ -91,6 +95,26 @@ int ip_vs_get_debug_level(void)
}
#endif
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
{
struct rt6_info *rt;
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip6_u = {
.daddr = *addr,
.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
};
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
return 0;
}
#endif
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
......@@ -282,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
* Returns hash value for virtual service
*/
static __inline__ unsigned
ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
__be16 port)
{
register unsigned porth = ntohs(port);
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
& IP_VS_SVC_TAB_MASK;
}
......@@ -317,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
* Hash it by <protocol,addr,port> in ip_vs_svc_table
*/
hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
svc->port);
list_add(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
......@@ -363,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
/*
* Get service by {proto,addr,port} in the service table.
*/
static __inline__ struct ip_vs_service *
__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
static inline struct ip_vs_service *
__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
__be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
if ((svc->addr == vaddr)
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
&& (svc->protocol == protocol)) {
/* HIT */
......@@ -389,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
/*
* Get service by {fwmark} in the service table.
*/
static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
static inline struct ip_vs_service *
__ip_vs_svc_fwm_get(int af, __u32 fwmark)
{
unsigned hash;
struct ip_vs_service *svc;
......@@ -398,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
hash = ip_vs_svc_fwm_hashkey(fwmark);
list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark) {
if (svc->fwmark == fwmark && svc->af == af) {
/* HIT */
atomic_inc(&svc->usecnt);
return svc;
......@@ -409,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
}
struct ip_vs_service *
ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
......@@ -418,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
/*
* Check the table hashed by fwmark first
*/
if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
goto out;
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
svc = __ip_vs_service_get(protocol, vaddr, vport);
svc = __ip_vs_service_get(af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
......@@ -435,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
......@@ -443,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
/*
* Check if the catch-all port (port zero) exists
*/
svc = __ip_vs_service_get(protocol, vaddr, 0);
svc = __ip_vs_service_get(af, protocol, vaddr, 0);
}
out:
read_unlock(&__ip_vs_svc_lock);
IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
NIPQUAD(vaddr), ntohs(vport),
svc?"hit":"not hit");
IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
svc ? "hit" : "not hit");
return svc;
}
......@@ -479,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
/*
* Returns hash value for real service
*/
static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
static inline unsigned ip_vs_rs_hashkey(int af,
const union nf_inet_addr *addr,
__be16 port)
{
register unsigned porth = ntohs(port);
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
& IP_VS_RTAB_MASK;
}
......@@ -503,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
* Hash by proto,addr,port,
* which are the parameters of the real service.
*/
hash = ip_vs_rs_hashkey(dest->addr, dest->port);
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
list_add(&dest->d_list, &ip_vs_rtable[hash]);
return 1;
......@@ -530,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
ip_vs_lookup_real_service(int af, __u16 protocol,
const union nf_inet_addr *daddr,
__be16 dport)
{
unsigned hash;
struct ip_vs_dest *dest;
......@@ -539,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
* Check for "full" addressed entries
* Return the first found entry
*/
hash = ip_vs_rs_hashkey(daddr, dport);
hash = ip_vs_rs_hashkey(af, daddr, dport);
read_lock(&__ip_vs_rs_lock);
list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
if ((dest->addr == daddr)
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
&& (dest->port == dport)
&& ((dest->protocol == protocol) ||
dest->vfwmark)) {
......@@ -561,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
* Lookup destination by {addr,port} in the given service
*/
static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__be16 dport)
{
struct ip_vs_dest *dest;
......@@ -569,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Find the destination for the given service
*/
list_for_each_entry(dest, &svc->destinations, n_list) {
if ((dest->addr == daddr) && (dest->port == dport)) {
if ((dest->af == svc->af)
&& ip_vs_addr_equal(svc->af, &dest->addr, daddr)
&& (dest->port == dport)) {
/* HIT */
return dest;
}
......@@ -588,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/
struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
__be32 vaddr, __be16 vport, __u16 protocol)
struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
__be16 vport, __u16 protocol)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
svc = ip_vs_service_get(0, protocol, vaddr, vport);
svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
......@@ -615,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
* scheduling.
*/
static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__be16 dport)
{
struct ip_vs_dest *dest, *nxt;
......@@ -623,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Find the destination in trash
*/
list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
if (dest->addr == daddr &&
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
(svc->fwmark ||
(dest->vaddr == svc->addr &&
(ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
dest->vport == svc->port))) {
/* HIT */
return dest;
......@@ -643,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
* Try to purge the destination from trash if not referenced
*/
if (atomic_read(&dest->refcnt) == 1) {
IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
"from trash\n",
dest->vfwmark,
NIPQUAD(dest->addr), ntohs(dest->port));
IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
"from trash\n",
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port));
list_del(&dest->n_list);
ip_vs_dst_reset(dest);
__ip_vs_unbind_svc(dest);
......@@ -685,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
stats->conns = 0;
stats->inpkts = 0;
stats->outpkts = 0;
stats->inbytes = 0;
stats->outbytes = 0;
stats->cps = 0;
stats->inpps = 0;
stats->outpps = 0;
stats->inbps = 0;
stats->outbps = 0;
memset(&stats->ustats, 0, sizeof(stats->ustats));
ip_vs_zero_estimator(stats);
spin_unlock_bh(&stats->lock);
......@@ -707,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
*/
static void
__ip_vs_update_dest(struct ip_vs_service *svc,
struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
{
int conn_flags;
......@@ -716,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
/* check if local node and update the flags */
if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
} else
#endif
if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
......@@ -760,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
* Create a destination for the given service
*/
static int
ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
......@@ -768,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
EnterFunction(2);
atype = inet_addr_type(&init_net, udest->addr);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
!__ip_vs_addr_is_local_v6(&udest->addr.in6))
return -EINVAL;
} else
#endif
{
atype = inet_addr_type(&init_net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
if (dest == NULL) {
......@@ -778,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
return -ENOMEM;
}
dest->af = svc->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
dest->addr = udest->addr;
ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
......@@ -807,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
* Add a destination into an existing service
*/
static int
ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
__be32 daddr = udest->addr;
union nf_inet_addr daddr;
__be16 dport = udest->port;
int ret;
......@@ -827,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
return -ERANGE;
}
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
/*
* Check if the dest already exists in the list
*/
dest = ip_vs_lookup_dest(svc, daddr, dport);
dest = ip_vs_lookup_dest(svc, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
return -EEXIST;
......@@ -840,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
* Check if the dest already exists in the trash and
* is from the same service
*/
dest = ip_vs_trash_get_dest(svc, daddr, dport);
dest = ip_vs_trash_get_dest(svc, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
"dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
NIPQUAD(daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
NIPQUAD(dest->vaddr),
ntohs(dest->vport));
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
__ip_vs_update_dest(svc, dest, udest);
/*
......@@ -915,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
* Edit a destination in the given service
*/
static int
ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
__be32 daddr = udest->addr;
union nf_inet_addr daddr;
__be16 dport = udest->port;
EnterFunction(2);
......@@ -934,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
return -ERANGE;
}
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
/*
* Lookup the destination list
*/
dest = ip_vs_lookup_dest(svc, daddr, dport);
dest = ip_vs_lookup_dest(svc, &daddr, dport);
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
return -ENOENT;
......@@ -991,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
atomic_dec(&dest->svc->refcnt);
kfree(dest);
} else {
IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
"dest->refcnt=%d\n",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
"dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
list_add(&dest->n_list, &ip_vs_dest_trash);
atomic_inc(&dest->refcnt);
}
......@@ -1028,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
* Delete a destination server in the given service
*/
static int
ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
__be32 daddr = udest->addr;
__be16 dport = udest->port;
EnterFunction(2);
dest = ip_vs_lookup_dest(svc, daddr, dport);
dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
return -ENOENT;
......@@ -1071,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
* Add a service into the service hash table
*/
static int
ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
ip_vs_add_service(struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0;
struct ip_vs_scheduler *sched = NULL;
......@@ -1089,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
goto out_mod_dec;
}
#ifdef CONFIG_IP_VS_IPV6
if (u->af == AF_INET6) {
if (!sched->supports_ipv6) {
ret = -EAFNOSUPPORT;
goto out_err;
}
if ((u->netmask < 1) || (u->netmask > 128)) {
ret = -EINVAL;
goto out_err;
}
}
#endif
svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
if (svc == NULL) {
IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
......@@ -1100,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
atomic_set(&svc->usecnt, 1);
atomic_set(&svc->refcnt, 0);
svc->af = u->af;
svc->protocol = u->protocol;
svc->addr = u->addr;
ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
svc->port = u->port;
svc->fwmark = u->fwmark;
svc->flags = u->flags;
......@@ -1125,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
atomic_inc(&ip_vs_nullsvc_counter);
ip_vs_new_estimator(&svc->stats);
ip_vs_num_services++;
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ip_vs_num_services++;
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
......@@ -1160,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
* Edit a service and bind it with a new scheduler
*/
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
struct ip_vs_scheduler *sched, *old_sched;
int ret = 0;
......@@ -1176,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
}
old_sched = sched;
#ifdef CONFIG_IP_VS_IPV6
if (u->af == AF_INET6) {
if (!sched->supports_ipv6) {
ret = -EAFNOSUPPORT;
goto out;
}
if ((u->netmask < 1) || (u->netmask > 128)) {
ret = -EINVAL;
goto out;
}
}
#endif
write_lock_bh(&__ip_vs_svc_lock);
/*
......@@ -1240,7 +1348,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
ip_vs_num_services--;
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ip_vs_num_services--;
ip_vs_kill_estimator(&svc->stats);
/* Unbind scheduler */
......@@ -1748,15 +1859,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
if (iter->table == ip_vs_svc_table)
seq_printf(seq, "%s %08X:%04X %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr),
ntohs(svc->port),
svc->scheduler->name);
else
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
ip_vs_proto_name(svc->protocol),
NIP6(svc->addr.in6),
ntohs(svc->port),
svc->scheduler->name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
svc->scheduler->name);
} else {
seq_printf(seq, "FWM %08X %s ",
svc->fwmark, svc->scheduler->name);
}
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
seq_printf(seq, "persistent %d %08X\n",
......@@ -1766,13 +1887,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
list_for_each_entry(dest, &svc->destinations, n_list) {
seq_printf(seq,
" -> %08X:%04X %-7s %-6d %-10d %-10d\n",
ntohl(dest->addr), ntohs(dest->port),
ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
atomic_read(&dest->weight),
atomic_read(&dest->activeconns),
atomic_read(&dest->inactconns));
#ifdef CONFIG_IP_VS_IPV6
if (dest->af == AF_INET6)
seq_printf(seq,
" -> [" NIP6_FMT "]:%04X"
" %-7s %-6d %-10d %-10d\n",
NIP6(dest->addr.in6),
ntohs(dest->port),
ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
atomic_read(&dest->weight),
atomic_read(&dest->activeconns),
atomic_read(&dest->inactconns));
else
#endif
seq_printf(seq,
" -> %08X:%04X "
"%-7s %-6d %-10d %-10d\n",
ntohl(dest->addr.ip),
ntohs(dest->port),
ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
atomic_read(&dest->weight),
atomic_read(&dest->activeconns),
atomic_read(&dest->inactconns));
}
}
return 0;
......@@ -1816,20 +1953,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
spin_lock_bh(&ip_vs_stats.lock);
seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
ip_vs_stats.inpkts, ip_vs_stats.outpkts,
(unsigned long long) ip_vs_stats.inbytes,
(unsigned long long) ip_vs_stats.outbytes);
seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
(unsigned long long) ip_vs_stats.ustats.inbytes,
(unsigned long long) ip_vs_stats.ustats.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq,"%8X %8X %8X %16X %16X\n",
ip_vs_stats.cps,
ip_vs_stats.inpps,
ip_vs_stats.outpps,
ip_vs_stats.inbps,
ip_vs_stats.outbps);
ip_vs_stats.ustats.cps,
ip_vs_stats.ustats.inpps,
ip_vs_stats.ustats.outpps,
ip_vs_stats.ustats.inbps,
ip_vs_stats.ustats.outbps);
spin_unlock_bh(&ip_vs_stats.lock);
return 0;
......@@ -1904,14 +2041,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
[SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
};
static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
struct ip_vs_service_user *usvc_compat)
{
usvc->af = AF_INET;
usvc->protocol = usvc_compat->protocol;
usvc->addr.ip = usvc_compat->addr;
usvc->port = usvc_compat->port;
usvc->fwmark = usvc_compat->fwmark;
/* Deep copy of sched_name is not needed here */
usvc->sched_name = usvc_compat->sched_name;
usvc->flags = usvc_compat->flags;
usvc->timeout = usvc_compat->timeout;
usvc->netmask = usvc_compat->netmask;
}
static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
struct ip_vs_dest_user *udest_compat)
{
udest->addr.ip = udest_compat->addr;
udest->port = udest_compat->port;
udest->conn_flags = udest_compat->conn_flags;
udest->weight = udest_compat->weight;
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
}
static int
do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
unsigned char arg[MAX_ARG_LEN];
struct ip_vs_service_user *usvc;
struct ip_vs_service_user *usvc_compat;
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
struct ip_vs_dest_user *udest;
struct ip_vs_dest_user *udest_compat;
struct ip_vs_dest_user_kern udest;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
......@@ -1951,35 +2118,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
}
usvc = (struct ip_vs_service_user *)arg;
udest = (struct ip_vs_dest_user *)(usvc + 1);
usvc_compat = (struct ip_vs_service_user *)arg;
udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
/* We only use the new structs internally, so copy userspace compat
* structs to extended internal versions */
ip_vs_copy_usvc_compat(&usvc, usvc_compat);
ip_vs_copy_udest_compat(&udest, udest_compat);
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc->fwmark && !usvc->addr && !usvc->port) {
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
ret = ip_vs_zero_all();
goto out_unlock;
}
}
/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
usvc->protocol, NIPQUAD(usvc->addr),
ntohs(usvc->port), usvc->sched_name);
usvc.protocol, NIPQUAD(usvc.addr.ip),
ntohs(usvc.port), usvc.sched_name);
ret = -EFAULT;
goto out_unlock;
}
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc->fwmark == 0)
svc = __ip_vs_service_get(usvc->protocol,
usvc->addr, usvc->port);
if (usvc.fwmark == 0)
svc = __ip_vs_service_get(usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
svc = __ip_vs_svc_fwm_get(usvc->fwmark);
svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc->protocol)) {
&& (svc == NULL || svc->protocol != usvc.protocol)) {
ret = -ESRCH;
goto out_unlock;
}
......@@ -1989,10 +2161,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
ret = ip_vs_add_service(usvc, &svc);
ret = ip_vs_add_service(&usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, usvc);
ret = ip_vs_edit_service(svc, &usvc);
break;
case IP_VS_SO_SET_DEL:
ret = ip_vs_del_service(svc);
......@@ -2003,13 +2175,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
ret = ip_vs_zero_service(svc);
break;
case IP_VS_SO_SET_ADDDEST:
ret = ip_vs_add_dest(svc, udest);
ret = ip_vs_add_dest(svc, &udest);
break;
case IP_VS_SO_SET_EDITDEST:
ret = ip_vs_edit_dest(svc, udest);
ret = ip_vs_edit_dest(svc, &udest);
break;
case IP_VS_SO_SET_DELDEST:
ret = ip_vs_del_dest(svc, udest);
ret = ip_vs_del_dest(svc, &udest);
break;
default:
ret = -EINVAL;
......@@ -2032,7 +2204,7 @@ static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
{
spin_lock_bh(&src->lock);
memcpy(dst, src, (char*)&src->lock - (char*)src);
memcpy(dst, &src->ustats, sizeof(*dst));
spin_unlock_bh(&src->lock);
}
......@@ -2040,7 +2212,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
dst->protocol = src->protocol;
dst->addr = src->addr;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
......@@ -2062,6 +2234,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET)
continue;
if (count >= get->num_services)
goto out;
memset(&entry, 0, sizeof(entry));
......@@ -2077,6 +2253,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET)
continue;
if (count >= get->num_services)
goto out;
memset(&entry, 0, sizeof(entry));
......@@ -2098,13 +2278,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
union nf_inet_addr addr = { .ip = get->addr };
int ret = 0;
if (get->fwmark)
svc = __ip_vs_svc_fwm_get(get->fwmark);
svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
else
svc = __ip_vs_service_get(get->protocol,
get->addr, get->port);
svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
get->port);
if (svc) {
int count = 0;
struct ip_vs_dest *dest;
......@@ -2114,7 +2296,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
if (count >= get->num_dests)
break;
entry.addr = dest->addr;
entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
entry.weight = atomic_read(&dest->weight);
......@@ -2239,13 +2421,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_service_entry *entry;
struct ip_vs_service *svc;
union nf_inet_addr addr;
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
if (entry->fwmark)
svc = __ip_vs_svc_fwm_get(entry->fwmark);
svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
else
svc = __ip_vs_service_get(entry->protocol,
entry->addr, entry->port);
svc = __ip_vs_service_get(AF_INET, entry->protocol,
&addr, entry->port);
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
......@@ -2396,16 +2580,16 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
spin_lock_bh(&stats->lock);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
spin_unlock_bh(&stats->lock);
......@@ -2430,7 +2614,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
if (!nl_service)
return -EMSGSIZE;
NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
if (svc->fwmark) {
NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
......@@ -2516,7 +2700,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
return skb->len;
}
static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
......@@ -2536,8 +2720,12 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
return -EINVAL;
/* For now, only support IPv4 */
if (nla_get_u16(nla_af) != AF_INET)
usvc->af = nla_get_u16(nla_af);
#ifdef CONFIG_IP_VS_IPV6
if (usvc->af != AF_INET && usvc->af != AF_INET6)
#else
if (usvc->af != AF_INET)
#endif
return -EAFNOSUPPORT;
if (nla_fwmark) {
......@@ -2569,10 +2757,10 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
/* prefill flags from service if it already exists */
if (usvc->fwmark)
svc = __ip_vs_svc_fwm_get(usvc->fwmark);
svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
else
svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
usvc->port);
svc = __ip_vs_service_get(usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
if (svc) {
usvc->flags = svc->flags;
ip_vs_service_put(svc);
......@@ -2582,9 +2770,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
/* set new flags from userland */
usvc->flags = (usvc->flags & ~flags.mask) |
(flags.flags & flags.mask);
strlcpy(usvc->sched_name, nla_data(nla_sched),
sizeof(usvc->sched_name));
usvc->sched_name = nla_data(nla_sched);
usvc->timeout = nla_get_u32(nla_timeout);
usvc->netmask = nla_get_u32(nla_netmask);
}
......@@ -2594,7 +2780,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
{
struct ip_vs_service_user usvc;
struct ip_vs_service_user_kern usvc;
int ret;
ret = ip_vs_genl_parse_service(&usvc, nla, 0);
......@@ -2602,10 +2788,10 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
return ERR_PTR(ret);
if (usvc.fwmark)
return __ip_vs_svc_fwm_get(usvc.fwmark);
return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
else
return __ip_vs_service_get(usvc.protocol, usvc.addr,
usvc.port);
return __ip_vs_service_get(usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
}
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
......@@ -2704,7 +2890,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
return skb->len;
}
static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
......@@ -2860,8 +3046,8 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc = NULL;
struct ip_vs_service_user usvc;
struct ip_vs_dest_user udest;
struct ip_vs_service_user_kern usvc;
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
......@@ -2913,9 +3099,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
svc = __ip_vs_service_get(usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
svc = __ip_vs_svc_fwm_get(usvc.fwmark);
svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
/* Unless we're adding a new service, the service must already exist */
if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
......
......@@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
......@@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 0,
#endif
.init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc,
.update_service = ip_vs_dh_update_svc,
......
......@@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
n_conns = s->conns;
n_inpkts = s->inpkts;
n_outpkts = s->outpkts;
n_inbytes = s->inbytes;
n_outbytes = s->outbytes;
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
n_outpkts = s->ustats.outpkts;
n_inbytes = s->ustats.inbytes;
n_outbytes = s->ustats.outbytes;
/* scaled by 2^10, but divided 2 seconds */
rate = (n_conns - e->last_conns)<<9;
e->last_conns = n_conns;
e->cps += ((long)rate - (long)e->cps)>>2;
s->cps = (e->cps+0x1FF)>>10;
s->ustats.cps = (e->cps+0x1FF)>>10;
rate = (n_inpkts - e->last_inpkts)<<9;
e->last_inpkts = n_inpkts;
e->inpps += ((long)rate - (long)e->inpps)>>2;
s->inpps = (e->inpps+0x1FF)>>10;
s->ustats.inpps = (e->inpps+0x1FF)>>10;
rate = (n_outpkts - e->last_outpkts)<<9;
e->last_outpkts = n_outpkts;
e->outpps += ((long)rate - (long)e->outpps)>>2;
s->outpps = (e->outpps+0x1FF)>>10;
s->ustats.outpps = (e->outpps+0x1FF)>>10;
rate = (n_inbytes - e->last_inbytes)<<4;
e->last_inbytes = n_inbytes;
e->inbps += ((long)rate - (long)e->inbps)>>2;
s->inbps = (e->inbps+0xF)>>5;
s->ustats.inbps = (e->inbps+0xF)>>5;
rate = (n_outbytes - e->last_outbytes)<<4;
e->last_outbytes = n_outbytes;
e->outbps += ((long)rate - (long)e->outbps)>>2;
s->outbps = (e->outbps+0xF)>>5;
s->ustats.outbps = (e->outbps+0xF)>>5;
spin_unlock(&s->lock);
}
spin_unlock(&est_lock);
......@@ -108,20 +108,20 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
INIT_LIST_HEAD(&est->list);
est->last_conns = stats->conns;
est->cps = stats->cps<<10;
est->last_conns = stats->ustats.conns;
est->cps = stats->ustats.cps<<10;
est->last_inpkts = stats->inpkts;
est->inpps = stats->inpps<<10;
est->last_inpkts = stats->ustats.inpkts;
est->inpps = stats->ustats.inpps<<10;
est->last_outpkts = stats->outpkts;
est->outpps = stats->outpps<<10;
est->last_outpkts = stats->ustats.outpkts;
est->outpps = stats->ustats.outpps<<10;
est->last_inbytes = stats->inbytes;
est->inbps = stats->inbps<<5;
est->last_inbytes = stats->ustats.inbytes;
est->inbps = stats->ustats.inbps<<5;
est->last_outbytes = stats->outbytes;
est->outbps = stats->outbps<<5;
est->last_outbytes = stats->ustats.outbytes;
est->outbps = stats->ustats.outbps<<5;
spin_lock_bh(&est_lock);
list_add(&est->list, &est_list);
......
......@@ -140,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
__be32 from;
union nf_inet_addr from;
__be16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
int ret;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
* so turn this into a no-op for IPv6 packets
*/
if (cp->af == AF_INET6)
return 1;
#endif
*diff = 0;
/* Only useful for established sessions */
......@@ -166,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
sizeof(SERVER_STRING)-1, ')',
&from, &port,
&from.ip, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n",
NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
NIPQUAD(from.ip), ntohs(port),
NIPQUAD(cp->caddr.ip), 0);
/*
* Now update or create an connection entry for it
*/
n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
cp->caddr, 0);
n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port,
&cp->caddr, 0);
if (!n_cp) {
n_cp = ip_vs_conn_new(IPPROTO_TCP,
cp->caddr, 0,
cp->vaddr, port,
from, port,
n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
&cp->caddr, 0,
&cp->vaddr, port,
&from, port,
IP_VS_CONN_F_NO_CPORT,
cp->dest);
if (!n_cp)
......@@ -196,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Replace the old passive address with the new one
*/
from = n_cp->vaddr;
from.ip = n_cp->vaddr.ip;
port = n_cp->vport;
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
(ntohs(port)>>8)&255, ntohs(port)&255);
buf_len = strlen(buf);
......@@ -243,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct tcphdr *th;
char *data, *data_start, *data_limit;
char *start, *end;
__be32 to;
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
* so turn this into a no-op for IPv6 packets
*/
if (cp->af == AF_INET6)
return 1;
#endif
/* no diff required for incoming packets */
*diff = 0;
......@@ -291,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
if (ip_vs_ftp_get_addrport(data_start, data_limit,
CLIENT_STRING, sizeof(CLIENT_STRING)-1,
'\r', &to, &port,
'\r', &to.ip, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
NIPQUAD(to), ntohs(port));
NIPQUAD(to.ip), ntohs(port));
/* Passive mode off */
cp->app_data = NULL;
......@@ -306,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0);
n_cp = ip_vs_conn_in_get(iph->protocol,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1));
n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol,
&to, port,
&cp->vaddr, htons(ntohs(cp->vport)-1));
if (!n_cp) {
n_cp = ip_vs_conn_new(IPPROTO_TCP,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1),
cp->daddr, htons(ntohs(cp->dport)-1),
n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
&to, port,
&cp->vaddr, htons(ntohs(cp->vport)-1),
&cp->daddr, htons(ntohs(cp->dport)-1),
0,
cp->dest);
if (!n_cp)
......
......@@ -422,7 +422,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
......@@ -506,7 +506,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
......@@ -522,6 +522,9 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 0,
#endif
.init_service = ip_vs_lblc_init_svc,
.done_service = ip_vs_lblc_done_svc,
.schedule = ip_vs_lblc_schedule,
......
......@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
......@@ -250,7 +250,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(most->addr), ntohs(most->port),
NIPQUAD(most->addr.ip), ntohs(most->port),
atomic_read(&most->activeconns),
atomic_read(&most->refcnt),
atomic_read(&most->weight), moh);
......@@ -598,7 +598,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
NIPQUAD(least->addr.ip), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
......@@ -706,7 +706,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
......@@ -722,6 +722,9 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 0,
#endif
.init_service = ip_vs_lblcr_init_svc,
.done_service = ip_vs_lblcr_done_svc,
.schedule = ip_vs_lblcr_schedule,
......
......@@ -67,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
if (least)
IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
NIPQUAD(least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n",
IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
return least;
}
......@@ -81,6 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.schedule = ip_vs_lc_schedule,
};
......
......@@ -99,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
return NULL;
out:
IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
......@@ -116,6 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.schedule = ip_vs_nq_schedule,
};
......
......@@ -151,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state)
}
void
ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
static void
ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
{
char buf[128];
struct iphdr _iph, *ih;
......@@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
#ifdef CONFIG_IP_VS_IPV6
static void
ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
{
char buf[192];
struct ipv6hdr _iph, *ih;
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
else if (ih->nexthdr == IPPROTO_FRAGMENT)
sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag",
pp->name, NIP6(ih->saddr),
NIP6(ih->daddr));
else {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
sizeof(_ports), _ports);
if (pptr == NULL)
sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT,
pp->name,
NIP6(ih->saddr),
NIP6(ih->daddr));
else
sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u",
pp->name,
NIP6(ih->saddr),
ntohs(pptr[0]),
NIP6(ih->daddr),
ntohs(pptr[1]));
}
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
#endif
void
ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb->protocol == __constant_htons(ETH_P_IPV6))
ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
else
#endif
ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
}
int __init ip_vs_protocol_init(void)
{
......
......@@ -39,25 +39,23 @@ struct isakmp_hdr {
static struct ip_vs_conn *
ah_esp_conn_in_get(const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
&iph->saddr,
htons(PORT_ISAKMP),
iph->daddr,
&iph->daddr,
htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
&iph->daddr,
htons(PORT_ISAKMP),
iph->saddr,
&iph->saddr,
htons(PORT_ISAKMP));
}
......@@ -66,12 +64,12 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
* We are not sure if the packet is from our
* service, so our conn_schedule hook should return NF_ACCEPT
*/
IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
"%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
inverse ? "ICMP+" : "",
pp->name,
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
pp->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
return cp;
......@@ -79,32 +77,35 @@ ah_esp_conn_in_get(const struct sk_buff *skb,
static struct ip_vs_conn *
ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
&iph->saddr,
htons(PORT_ISAKMP),
iph->daddr,
&iph->daddr,
htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
&iph->daddr,
htons(PORT_ISAKMP),
iph->saddr,
&iph->saddr,
htons(PORT_ISAKMP));
}
if (!cp) {
IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
"%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
inverse ? "ICMP+" : "",
pp->name,
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
pp->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
return cp;
......@@ -112,8 +113,7 @@ ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static int
ah_esp_conn_schedule(struct sk_buff *skb,
struct ip_vs_protocol *pp,
ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
/*
......@@ -125,8 +125,8 @@ ah_esp_conn_schedule(struct sk_buff *skb,
static void
ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
struct iphdr _iph, *ih;
......@@ -142,6 +142,38 @@ ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
#ifdef CONFIG_IP_VS_IPV6
static void
ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
struct ipv6hdr _iph, *ih;
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT,
pp->name, NIP6(ih->saddr),
NIP6(ih->daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
#endif
static void
ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb->protocol == __constant_htons(ETH_P_IPV6))
ah_esp_debug_packet_v6(pp, skb, offset, msg);
else
#endif
ah_esp_debug_packet_v4(pp, skb, offset, msg);
}
static void ah_esp_init(struct ip_vs_protocol *pp)
{
......
......@@ -25,8 +25,9 @@
static struct ip_vs_conn *
tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
......@@ -35,19 +36,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_in_get(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1]);
return ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
return ip_vs_conn_in_get(iph->protocol,
iph->daddr, pptr[1],
iph->saddr, pptr[0]);
return ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
}
static struct ip_vs_conn *
tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
__be16 _ports[2], *pptr;
......@@ -56,34 +58,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
return ip_vs_conn_out_get(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1]);
return ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
return ip_vs_conn_out_get(iph->protocol,
iph->daddr, pptr[1],
iph->saddr, pptr[0]);
return ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
}
static int
tcp_conn_schedule(struct sk_buff *skb,
struct ip_vs_protocol *pp,
tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
struct ip_vs_iphdr iph;
th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
if (th->syn &&
(svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
ip_hdr(skb)->daddr, th->dest))) {
(svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
......@@ -110,22 +114,62 @@ tcp_conn_schedule(struct sk_buff *skb,
static inline void
tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
tcp_fast_csum_update(int af, struct tcphdr *tcph,
const union nf_inet_addr *oldip,
const union nf_inet_addr *newip,
__be16 oldport, __be16 newport)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
tcph->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(tcph->check))));
else
#endif
tcph->check =
csum_fold(ip_vs_check_diff4(oldip, newip,
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(tcph->check))));
}
static inline void
tcp_partial_csum_update(int af, struct tcphdr *tcph,
const union nf_inet_addr *oldip,
const union nf_inet_addr *newip,
__be16 oldlen, __be16 newlen)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
tcph->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(tcph->check))));
else
#endif
tcph->check =
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(tcph->check))));
}
static int
tcp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
const unsigned int tcphoff = ip_hdrlen(skb);
unsigned int tcphoff;
int oldlen;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
tcphoff = sizeof(struct ipv6hdr);
else
#endif
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
......@@ -133,7 +177,7 @@ tcp_snat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(skb, pp))
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
......@@ -141,13 +185,17 @@ tcp_snat_handler(struct sk_buff *skb,
return 0;
}
tcph = (void *)ip_hdr(skb) + tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
if (!cp->app) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
htonl(oldlen),
htonl(skb->len - tcphoff));
} else if (!cp->app) {
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
......@@ -155,9 +203,20 @@ tcp_snat_handler(struct sk_buff *skb,
/* full checksum calculation */
tcph->check = 0;
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
skb->len - tcphoff,
cp->protocol, skb->csum);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
&cp->caddr.in6,
skb->len - tcphoff,
cp->protocol, skb->csum);
else
#endif
tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
cp->caddr.ip,
skb->len - tcphoff,
cp->protocol,
skb->csum);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check,
(char*)&(tcph->check) - (char*)tcph);
......@@ -171,7 +230,16 @@ tcp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
const unsigned int tcphoff = ip_hdrlen(skb);
unsigned int tcphoff;
int oldlen;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
tcphoff = sizeof(struct ipv6hdr);
else
#endif
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
......@@ -179,7 +247,7 @@ tcp_dnat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(skb, pp))
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
......@@ -190,15 +258,19 @@ tcp_dnat_handler(struct sk_buff *skb,
return 0;
}
tcph = (void *)ip_hdr(skb) + tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->dest = cp->dport;
/*
* Adjust TCP checksums
*/
if (!cp->app) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
htonl(oldlen),
htonl(skb->len - tcphoff));
} else if (!cp->app) {
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
......@@ -206,9 +278,19 @@ tcp_dnat_handler(struct sk_buff *skb,
/* full checksum calculation */
tcph->check = 0;
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
skb->len - tcphoff,
cp->protocol, skb->csum);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
tcph->check = csum_ipv6_magic(&cp->caddr.in6,
&cp->daddr.in6,
skb->len - tcphoff,
cp->protocol, skb->csum);
else
#endif
tcph->check = csum_tcpudp_magic(cp->caddr.ip,
cp->daddr.ip,
skb->len - tcphoff,
cp->protocol,
skb->csum);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
......@@ -216,21 +298,43 @@ tcp_dnat_handler(struct sk_buff *skb,
static int
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
const unsigned int tcphoff = ip_hdrlen(skb);
unsigned int tcphoff;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
tcphoff = sizeof(struct ipv6hdr);
else
#endif
tcphoff = ip_hdrlen(skb);
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
skb->len - tcphoff,
ip_hdr(skb)->protocol, skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - tcphoff,
ipv6_hdr(skb)->nexthdr,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
} else
#endif
if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - tcphoff,
ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
break;
default:
/* No need to checksum. */
......@@ -419,19 +523,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
if (new_state != cp->state) {
struct ip_vs_dest *dest = cp->dest;
IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
"%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
pp->name,
(state_off==TCP_DIR_OUTPUT)?"output ":"input ",
th->syn? 'S' : '.',
th->fin? 'F' : '.',
th->ack? 'A' : '.',
th->rst? 'R' : '.',
NIPQUAD(cp->daddr), ntohs(cp->dport),
NIPQUAD(cp->caddr), ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
atomic_read(&cp->refcnt));
IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
pp->name,
((state_off == TCP_DIR_OUTPUT) ?
"output " : "input "),
th->syn ? 'S' : '.',
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
atomic_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(new_state != IP_VS_TCP_S_ESTABLISHED)) {
......@@ -461,7 +569,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
{
struct tcphdr _tcph, *th;
th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
#ifdef CONFIG_IP_VS_IPV6
int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
#else
int ihl = ip_hdrlen(skb);
#endif
th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
......@@ -546,12 +660,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
break;
spin_unlock(&tcp_app_lock);
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
__func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
__func__,
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
ntohs(cp->vport),
inc->name, ntohs(inc->port));
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
......
......@@ -24,8 +24,9 @@
#include <net/ip.h>
static struct ip_vs_conn *
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
......@@ -35,13 +36,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1]);
cp = ip_vs_conn_in_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_in_get(iph->protocol,
iph->daddr, pptr[1],
iph->saddr, pptr[0]);
cp = ip_vs_conn_in_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
return cp;
......@@ -49,25 +50,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn *
udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_ports), _ports);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(iph->protocol,
iph->saddr, pptr[0],
iph->daddr, pptr[1]);
cp = ip_vs_conn_out_get(af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1]);
} else {
cp = ip_vs_conn_out_get(iph->protocol,
iph->daddr, pptr[1],
iph->saddr, pptr[0]);
cp = ip_vs_conn_out_get(af, iph->protocol,
&iph->daddr, pptr[1],
&iph->saddr, pptr[0]);
}
return cp;
......@@ -75,21 +76,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static int
udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
struct ip_vs_iphdr iph;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
uh = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_udph), &_udph);
uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
ip_hdr(skb)->daddr, uh->dest))) {
svc = ip_vs_service_get(af, skb->mark, iph.protocol,
&iph.daddr, uh->dest);
if (svc) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
......@@ -116,23 +120,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
static inline void
udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
udp_fast_csum_update(int af, struct udphdr *uhdr,
const union nf_inet_addr *oldip,
const union nf_inet_addr *newip,
__be16 oldport, __be16 newport)
{
uhdr->check =
csum_fold(ip_vs_check_diff4(oldip, newip,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(uhdr->check))));
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
uhdr->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(uhdr->check))));
else
#endif
uhdr->check =
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldport, newport,
~csum_unfold(uhdr->check))));
if (!uhdr->check)
uhdr->check = CSUM_MANGLED_0;
}
static inline void
udp_partial_csum_update(int af, struct udphdr *uhdr,
const union nf_inet_addr *oldip,
const union nf_inet_addr *newip,
__be16 oldlen, __be16 newlen)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
uhdr->check =
csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(uhdr->check))));
else
#endif
uhdr->check =
csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
ip_vs_check_diff2(oldlen, newlen,
~csum_unfold(uhdr->check))));
}
static int
udp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
const unsigned int udphoff = ip_hdrlen(skb);
unsigned int udphoff;
int oldlen;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
udphoff = sizeof(struct ipv6hdr);
else
#endif
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
......@@ -140,7 +184,7 @@ udp_snat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(skb, pp))
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
......@@ -150,15 +194,19 @@ udp_snat_handler(struct sk_buff *skb,
return 0;
}
udph = (void *)ip_hdr(skb) + udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->source = cp->vport;
/*
* Adjust UDP checksums
*/
if (!cp->app && (udph->check != 0)) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
htonl(oldlen),
htonl(skb->len - udphoff));
} else if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
......@@ -166,9 +214,19 @@ udp_snat_handler(struct sk_buff *skb,
/* full checksum calculation */
udph->check = 0;
skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
skb->len - udphoff,
cp->protocol, skb->csum);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
udph->check = csum_ipv6_magic(&cp->vaddr.in6,
&cp->caddr.in6,
skb->len - udphoff,
cp->protocol, skb->csum);
else
#endif
udph->check = csum_tcpudp_magic(cp->vaddr.ip,
cp->caddr.ip,
skb->len - udphoff,
cp->protocol,
skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
......@@ -184,7 +242,16 @@ udp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
unsigned int udphoff = ip_hdrlen(skb);
unsigned int udphoff;
int oldlen;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
udphoff = sizeof(struct ipv6hdr);
else
#endif
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
......@@ -192,7 +259,7 @@ udp_dnat_handler(struct sk_buff *skb,
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
if (pp->csum_check && !pp->csum_check(skb, pp))
if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
return 0;
/*
......@@ -203,15 +270,19 @@ udp_dnat_handler(struct sk_buff *skb,
return 0;
}
udph = (void *)ip_hdr(skb) + udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->dest = cp->dport;
/*
* Adjust UDP checksums
*/
if (!cp->app && (udph->check != 0)) {
if (skb->ip_summed == CHECKSUM_PARTIAL) {
udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
htonl(oldlen),
htonl(skb->len - udphoff));
} else if (!cp->app && (udph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
......@@ -219,9 +290,19 @@ udp_dnat_handler(struct sk_buff *skb,
/* full checksum calculation */
udph->check = 0;
skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
skb->len - udphoff,
cp->protocol, skb->csum);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
udph->check = csum_ipv6_magic(&cp->caddr.in6,
&cp->daddr.in6,
skb->len - udphoff,
cp->protocol, skb->csum);
else
#endif
udph->check = csum_tcpudp_magic(cp->caddr.ip,
cp->daddr.ip,
skb->len - udphoff,
cp->protocol,
skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY;
......@@ -231,10 +312,17 @@ udp_dnat_handler(struct sk_buff *skb,
static int
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
struct udphdr _udph, *uh;
const unsigned int udphoff = ip_hdrlen(skb);
unsigned int udphoff;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
udphoff = sizeof(struct ipv6hdr);
else
#endif
udphoff = ip_hdrlen(skb);
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
if (uh == NULL)
......@@ -246,15 +334,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_COMPLETE:
if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - udphoff,
ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - udphoff,
ipv6_hdr(skb)->nexthdr,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
} else
#endif
if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - udphoff,
ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
}
break;
default:
/* No need to checksum. */
......@@ -340,12 +441,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
break;
spin_unlock(&udp_app_lock);
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
__func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
__func__,
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
ntohs(cp->vport),
inc->name, ntohs(inc->port));
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
......
......@@ -74,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
out:
svc->sched_data = q;
write_unlock(&svc->sched_lock);
IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
"activeconns %d refcnt %d weight %d\n",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
......@@ -89,6 +89,9 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.init_service = ip_vs_rr_init_svc,
.update_service = ip_vs_rr_update_svc,
.schedule = ip_vs_rr_schedule,
......
......@@ -101,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
IP_VS_DBG_BUF(6, "SED: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
......@@ -118,6 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.schedule = ip_vs_sed_schedule,
};
......
......@@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(iph->saddr),
NIPQUAD(dest->addr),
NIPQUAD(dest->addr.ip),
ntohs(dest->port));
return dest;
......@@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 0,
#endif
.init_service = ip_vs_sh_init_svc,
.done_service = ip_vs_sh_done_svc,
.update_service = ip_vs_sh_update_svc,
......
......@@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
s->cport = cp->cport;
s->vport = cp->vport;
s->dport = cp->dport;
s->caddr = cp->caddr;
s->vaddr = cp->vaddr;
s->daddr = cp->daddr;
s->caddr = cp->caddr.ip;
s->vaddr = cp->vaddr.ip;
s->daddr = cp->daddr.ip;
s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
s->state = htons(cp->state);
if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
......@@ -366,21 +366,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
}
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport);
cp = ip_vs_conn_in_get(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr,
s->cport,
(union nf_inet_addr *)&s->vaddr,
s->vport);
else
cp = ip_vs_ct_in_get(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport);
cp = ip_vs_ct_in_get(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr,
s->cport,
(union nf_inet_addr *)&s->vaddr,
s->vport);
if (!cp) {
/*
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
* but still handled.
*/
dest = ip_vs_find_dest(s->daddr, s->dport,
s->vaddr, s->vport,
dest = ip_vs_find_dest(AF_INET,
(union nf_inet_addr *)&s->daddr,
s->dport,
(union nf_inet_addr *)&s->vaddr,
s->vport,
s->protocol);
/* Set the approprite ativity flag */
if (s->protocol == IPPROTO_TCP) {
......@@ -389,10 +396,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
else
flags &= ~IP_VS_CONN_F_INACTIVE;
}
cp = ip_vs_conn_new(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport,
s->daddr, s->dport,
cp = ip_vs_conn_new(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr,
s->cport,
(union nf_inet_addr *)&s->vaddr,
s->vport,
(union nf_inet_addr *)&s->daddr,
s->dport,
flags, dest);
if (dest)
atomic_dec(&dest->refcnt);
......
......@@ -89,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
NIPQUAD(least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
IP_VS_DBG_BUF(6, "WLC: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
......@@ -106,6 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.schedule = ip_vs_wlc_schedule,
};
......
......@@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
}
IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
"activeconns %d refcnt %d weight %d\n",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
out:
write_unlock(&svc->sched_lock);
......@@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
#ifdef CONFIG_IP_VS_IPV6
.supports_ipv6 = 1,
#endif
.init_service = ip_vs_wrr_init_svc,
.done_service = ip_vs_wrr_done_svc,
.update_service = ip_vs_wrr_update_svc,
......
......@@ -20,6 +20,9 @@
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h> /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
......@@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
if (!dst)
return NULL;
if ((dst->obsolete || rtos != dest->dst_rtos) &&
if ((dst->obsolete
|| (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
dst->ops->check(dst, cookie) == NULL) {
dest->dst_cache = NULL;
dst_release(dst);
......@@ -71,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = dest->addr,
.daddr = dest->addr.ip,
.saddr = 0,
.tos = rtos, } },
};
......@@ -80,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip_route_output error, "
"dest: %u.%u.%u.%u\n",
NIPQUAD(dest->addr));
NIPQUAD(dest->addr.ip));
return NULL;
}
__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
NIPQUAD(dest->addr),
NIPQUAD(dest->addr.ip),
atomic_read(&rt->u.dst.__refcnt), rtos);
}
spin_unlock(&dest->dst_lock);
......@@ -94,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = cp->daddr,
.daddr = cp->daddr.ip,
.saddr = 0,
.tos = rtos, } },
};
if (ip_route_output_key(&init_net, &rt, &fl)) {
IP_VS_DBG_RL("ip_route_output error, dest: "
"%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
"%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip));
return NULL;
}
}
......@@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
return rt;
}
#ifdef CONFIG_IP_VS_IPV6
static struct rt6_info *
__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
{
struct rt6_info *rt; /* Route to the other host */
struct ip_vs_dest *dest = cp->dest;
if (dest) {
spin_lock(&dest->dst_lock);
rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
if (!rt) {
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip6_u = {
.daddr = dest->addr.in6,
.saddr = {
.s6_addr32 =
{ 0, 0, 0, 0 },
},
},
},
};
rt = (struct rt6_info *)ip6_route_output(&init_net,
NULL, &fl);
if (!rt) {
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip6_route_output error, "
"dest: " NIP6_FMT "\n",
NIP6(dest->addr.in6));
return NULL;
}
__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n",
NIP6(dest->addr.in6),
atomic_read(&rt->u.dst.__refcnt));
}
spin_unlock(&dest->dst_lock);
} else {
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip6_u = {
.daddr = cp->daddr.in6,
.saddr = {
.s6_addr32 = { 0, 0, 0, 0 },
},
},
},
};
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
if (!rt) {
IP_VS_DBG_RL("ip6_route_output error, dest: "
NIP6_FMT "\n", NIP6(cp->daddr.in6));
return NULL;
}
}
return rt;
}
#endif
/*
* Release dest->dst_cache before a dest is removed
......@@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
dst_release(old_dst);
}
#define IP_VS_XMIT(skb, rt) \
#define IP_VS_XMIT(pf, skb, rt) \
do { \
(skb)->ipvs_property = 1; \
skb_forward_csum(skb); \
NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
(rt)->u.dst.dev, dst_output); \
} while (0)
......@@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(skb, rt);
IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
......@@ -213,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rt6_info *rt; /* Route to the other host */
struct ipv6hdr *iph = ipv6_hdr(skb);
int mtu;
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip6_u = {
.daddr = iph->daddr,
.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
};
EnterFunction(10);
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
if (!rt) {
IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, "
"dest: " NIP6_FMT "\n", NIP6(iph->daddr));
goto tx_error_icmp;
}
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
goto tx_error;
}
/*
* Call ip_send_check because we are not sure it is called
* after ip_defrag. Is copy-on-write needed?
*/
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
dst_release(&rt->u.dst);
return NF_STOLEN;
}
/* drop old route */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(PF_INET6, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
#endif
/*
* NAT transmitter (only for outside-to-inside nat forwarding)
......@@ -264,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
ip_hdr(skb)->daddr = cp->daddr;
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
......@@ -276,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(skb, rt);
IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
......@@ -292,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
EnterFunction(10);
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
rt = __ip_vs_get_out_rt_v6(cp);
if (!rt)
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"ip_vs_nat_xmit_v6(): frag needed for");
goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
goto tx_error_put;
/* drop old route */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
MTU problem. */
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(PF_INET6, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
LeaveFunction(10);
kfree_skb(skb);
return NF_STOLEN;
tx_error_put:
dst_release(&rt->u.dst);
goto tx_error;
}
#endif
/*
* IP Tunneling transmitter
......@@ -423,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rt6_info *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *old_iph = ipv6_hdr(skb);
sk_buff_data_t old_transport_header = skb->transport_header;
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int mtu;
EnterFunction(10);
if (skb->protocol != htons(ETH_P_IPV6)) {
IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
"ETH_P_IPV6: %d, skb protocol: %d\n",
htons(ETH_P_IPV6), skb->protocol);
goto tx_error;
}
rt = __ip_vs_get_out_rt_v6(cp);
if (!rt)
goto tx_error_icmp;
tdev = rt->u.dst.dev;
mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
/* TODO IPv6: do we need this check in IPv6? */
if (mtu < 1280) {
dst_release(&rt->u.dst);
IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
goto tx_error;
}
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
dst_release(&rt->u.dst);
IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
goto tx_error;
}
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
if (skb_headroom(skb) < max_headroom
|| skb_cloned(skb) || skb_shared(skb)) {
struct sk_buff *new_skb =
skb_realloc_headroom(skb, max_headroom);
if (!new_skb) {
dst_release(&rt->u.dst);
kfree_skb(skb);
IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
return NF_STOLEN;
}
kfree_skb(skb);
skb = new_skb;
old_iph = ipv6_hdr(skb);
}
skb->transport_header = old_transport_header;
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/*
* Push down and install the IPIP header.
*/
iph = ipv6_hdr(skb);
iph->version = 6;
iph->nexthdr = IPPROTO_IPV6;
iph->payload_len = old_iph->payload_len + sizeof(old_iph);
iph->priority = old_iph->priority;
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
iph->daddr = rt->rt6i_dst.addr;
iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */
iph->hop_limit = old_iph->hop_limit;
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
ip6_local_out(skb);
LeaveFunction(10);
return NF_STOLEN;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
#endif
/*
* Direct Routing transmitter
......@@ -467,7 +782,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(skb, rt);
IP_VS_XMIT(PF_INET, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
......@@ -480,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
return NF_STOLEN;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
EnterFunction(10);
rt = __ip_vs_get_out_rt_v6(cp);
if (!rt)
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
dst_release(&rt->u.dst);
IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
goto tx_error;
}
/*
* Call ip_send_check because we are not sure it is called
* after ip_defrag. Is copy-on-write needed?
*/
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
dst_release(&rt->u.dst);
return NF_STOLEN;
}
/* drop old route */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(PF_INET6, skb, rt);
LeaveFunction(10);
return NF_STOLEN;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
kfree_skb(skb);
LeaveFunction(10);
return NF_STOLEN;
}
#endif
/*
* ICMP packet transmitter
......@@ -540,7 +909,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(skb, rt);
IP_VS_XMIT(PF_INET, skb, rt);
rc = NF_STOLEN;
goto out;
......@@ -557,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset)
{
struct rt6_info *rt; /* Route to the other host */
int mtu;
int rc;
EnterFunction(10);
/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
forwarded directly here, because there is no need to
translate address/port back */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
if (cp->packet_xmit)
rc = cp->packet_xmit(skb, cp, pp);
else
rc = NF_ACCEPT;
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
goto out;
}
/*
* mangle and send the packet here (only for VS/NAT)
*/
rt = __ip_vs_get_out_rt_v6(cp);
if (!rt)
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
goto tx_error_put;
/* drop the old route when skb is not shared */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
IP_VS_XMIT(PF_INET6, skb, rt);
rc = NF_STOLEN;
goto out;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
dev_kfree_skb(skb);
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
tx_error_put:
dst_release(&rt->u.dst);
goto tx_error;
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment