Commit 86ca984c authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:
 "Mostly netfilter bug fixes in here, but we have some bits elsewhere as
  well.

   1) Don't do SNAT replies for non-NATed connections in IPVS, from
      Julian Anastasov.

   2) Don't delete conntrack helpers while they are still in use, from
      Liping Zhang.

   3) Fix zero padding in xtables's xt_data_to_user(), from Willem de
      Bruijn.

   4) Add proper RCU protection to nf_tables_dump_set() because we
      cannot guarantee that we hold the NFNL_SUBSYS_NFTABLES lock. From
      Liping Zhang.

   5) Initialize rcv_mss in tcp_disconnect(), from Wei Wang.

   6) smsc95xx devices can't handle IPV6 checksums fully, so don't
      advertise support for offloading them. From Nisar Sayed.

   7) Fix out-of-bounds access in __ip6_append_data(), from Eric
      Dumazet.

   8) Make atl2_probe() propagate the error code properly on failures,
      from Alexey Khoroshilov.

   9) arp_target[] in bond_check_params() is used uninitialized. This
      got changes from a global static to a local variable, which is how
      this mistake happened. Fix from Jarod Wilson.

  10) Fix fallout from unnecessary NULL check removal in cls_matchall,
      from Jiri Pirko. This is definitely brown paper bag territory..."

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (26 commits)
  net: sched: cls_matchall: fix null pointer dereference
  vsock: use new wait API for vsock_stream_sendmsg()
  bonding: fix randomly populated arp target array
  net: Make IP alignment calulations clearer.
  bonding: fix accounting of active ports in 3ad
  net: atheros: atl2: don't return zero on failure path in atl2_probe()
  ipv6: fix out of bound writes in __ip6_append_data()
  bridge: start hello_timer when enabling KERNEL_STP in br_stp_start
  smsc95xx: Support only IPv4 TCP/UDP csum offload
  arp: always override existing neigh entries with gratuitous ARP
  arp: postpone addr_type calculation to as late as possible
  arp: decompose is_garp logic into a separate function
  arp: fixed error in a comment
  tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0
  netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT
  ebtables: arpreply: Add the standard target sanity check
  netfilter: nf_tables: revisit chain/object refcounting from elements
  netfilter: nf_tables: missing sanitization in data from userspace
  netfilter: nf_tables: can't assume lock is acquired when dumping set elems
  netfilter: synproxy: fix conntrackd interaction
  ...
parents 08332893 2d76b2f8
......@@ -2577,7 +2577,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
return -1;
ad_info->aggregator_id = aggregator->aggregator_identifier;
ad_info->ports = aggregator->num_of_ports;
ad_info->ports = __agg_active_ports(aggregator);
ad_info->actor_key = aggregator->actor_oper_aggregator_key;
ad_info->partner_key = aggregator->partner_oper_aggregator_key;
ether_addr_copy(ad_info->partner_system,
......
......@@ -4271,10 +4271,10 @@ static int bond_check_params(struct bond_params *params)
int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
struct bond_opt_value newval;
const struct bond_opt_value *valptr;
int arp_all_targets_value;
int arp_all_targets_value = 0;
u16 ad_actor_sys_prio = 0;
u16 ad_user_port_key = 0;
__be32 arp_target[BOND_MAX_ARP_TARGETS];
__be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
int arp_ip_count;
int bond_mode = BOND_MODE_ROUNDROBIN;
int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
......@@ -4501,7 +4501,6 @@ static int bond_check_params(struct bond_params *params)
arp_validate_value = 0;
}
arp_all_targets_value = 0;
if (arp_all_targets) {
bond_opt_initstr(&newval, arp_all_targets);
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
......
......@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
err = -EIO;
goto err_dma;
}
......@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* pcibios_set_master to do the needed arch specific settings */
pci_set_master(pdev);
err = -ENOMEM;
netdev = alloc_etherdev(sizeof(struct atl2_adapter));
if (!netdev)
if (!netdev) {
err = -ENOMEM;
goto err_alloc_etherdev;
}
SET_NETDEV_DEV(netdev, &pdev->dev);
......@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_sw_init;
err = -EIO;
netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
......
......@@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev,
if (ret < 0)
return ret;
if (features & NETIF_F_HW_CSUM)
if (features & NETIF_F_IP_CSUM)
read_buf |= Tx_COE_EN_;
else
read_buf &= ~Tx_COE_EN_;
......@@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
spin_lock_init(&pdata->mac_cr_lock);
/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
* RFC 2460, ipv6 UDP calculated checksum yields a result of zero must
* be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero,
* it is transmitted as all ones. The zero transmitted checksum means
* transmitter generated no checksum. Hence, enable csum offload only
* for ipv4 packets.
*/
if (DEFAULT_TX_CSUM_ENABLE)
dev->net->features |= NETIF_F_HW_CSUM;
dev->net->features |= NETIF_F_IP_CSUM;
if (DEFAULT_RX_CSUM_ENABLE)
dev->net->features |= NETIF_F_RXCSUM;
dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
smsc95xx_init_mac_address(dev);
......
......@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
int xt_target_to_user(const struct xt_entry_target *t,
struct xt_entry_target __user *u);
int xt_data_to_user(void __user *dst, const void *src,
int usersize, int size);
int usersize, int size, int aligned_size);
void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
struct xt_counters_info *info, bool compat);
......
......@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
/* True if the target is not a standard target */
#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
static inline bool ebt_invalid_target(int target)
{
return (target < -NUM_STANDARD_TARGETS || target >= 0);
}
#endif
......@@ -9,6 +9,7 @@
#ifndef _NF_CONNTRACK_HELPER_H
#define _NF_CONNTRACK_HELPER_H
#include <linux/refcount.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_expect.h>
......@@ -26,6 +27,7 @@ struct nf_conntrack_helper {
struct hlist_node hnode; /* Internal use. */
char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
refcount_t refcnt;
struct module *me; /* pointer to self */
const struct nf_conntrack_expect_policy *expect_policy;
......@@ -79,6 +81,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
u16 l3num,
u8 protonum);
void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
void nf_ct_helper_init(struct nf_conntrack_helper *helper,
u16 l3num, u16 protonum, const char *name,
u16 default_port, u16 spec_port, u32 id,
......
......@@ -176,7 +176,7 @@ struct nft_data_desc {
int nft_data_init(const struct nft_ctx *ctx,
struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla);
void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
void nft_data_release(const struct nft_data *data, enum nft_data_types type);
int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
enum nft_data_types type, unsigned int len);
......
......@@ -808,11 +808,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
reg_off += reg->aux_off;
}
/* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
* we force this to 2 which is universally what architectures use
* when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
*/
ip_align = strict ? 2 : NET_IP_ALIGN;
/* For platforms that do not have a Kconfig enabling
* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
* NET_IP_ALIGN is universally set to '2'. And on platforms
* that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
* to this code only in strict mode where we want to emulate
* the NET_IP_ALIGN==2 checking. Therefore use an
* unconditional IP align value of '2'.
*/
ip_align = 2;
if ((ip_align + reg_off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
ip_align, reg_off, off, size);
......
......@@ -179,6 +179,7 @@ static void br_stp_start(struct net_bridge *br)
br_debug(br, "using kernel STP\n");
/* To start timers on any ports left in blocking */
mod_timer(&br->hello_timer, jiffies + br->hello_time);
br_port_state_selection(br);
}
......
......@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
if (br->dev->flags & IFF_UP) {
br_config_bpdu_generation(br);
if (br->stp_enabled != BR_USER_STP)
if (br->stp_enabled == BR_KERNEL_STP)
mod_timer(&br->hello_timer,
round_jiffies(jiffies + br->hello_time));
}
......
......@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
if (e->ethproto != htons(ETH_P_ARP) ||
e->invflags & EBT_IPROTO)
return -EINVAL;
if (ebt_invalid_target(info->target))
return -EINVAL;
return 0;
}
......
......@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
strlcpy(name, _name, sizeof(name));
if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
xt_data_to_user(um + entrysize, data, usersize, datasize))
xt_data_to_user(um + entrysize, data, usersize, datasize,
XT_ALIGN(datasize)))
return -EFAULT;
return 0;
......@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
if (match->compat_to_user(cm->data, m->data))
return -EFAULT;
} else {
if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
COMPAT_XT_ALIGN(msize)))
return -EFAULT;
}
......@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
if (target->compat_to_user(cm->data, t->data))
return -EFAULT;
} else {
if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
COMPAT_XT_ALIGN(tsize)))
return -EFAULT;
}
......
......@@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb)
}
EXPORT_SYMBOL(arp_xmit);
static bool arp_is_garp(struct net *net, struct net_device *dev,
int *addr_type, __be16 ar_op,
__be32 sip, __be32 tip,
unsigned char *sha, unsigned char *tha)
{
bool is_garp = tip == sip;
/* Gratuitous ARP _replies_ also require target hwaddr to be
* the same as source.
*/
if (is_garp && ar_op == htons(ARPOP_REPLY))
is_garp =
/* IPv4 over IEEE 1394 doesn't provide target
* hardware address field in its ARP payload.
*/
tha &&
!memcmp(tha, sha, dev->addr_len);
if (is_garp) {
*addr_type = inet_addr_type_dev_table(net, dev, sip);
if (*addr_type != RTN_UNICAST)
is_garp = false;
}
return is_garp;
}
/*
* Process an arp request.
*/
......@@ -837,29 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
if (IN_DEV_ARP_ACCEPT(in_dev)) {
unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
addr_type = -1;
is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
sip, tip, sha, tha);
}
if (IN_DEV_ARP_ACCEPT(in_dev)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
is_garp = tip == sip && addr_type == RTN_UNICAST;
/* Unsolicited ARP _replies_ also require target hwaddr to be
* the same as source.
*/
if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
is_garp =
/* IPv4 over IEEE 1394 doesn't provide target
* hardware address field in its ARP payload.
*/
tha &&
!memcmp(tha, sha, dev->addr_len);
if (!n &&
((arp->ar_op == htons(ARPOP_REPLY) &&
addr_type == RTN_UNICAST) || is_garp))
(is_garp ||
(arp->ar_op == htons(ARPOP_REPLY) &&
(addr_type == RTN_UNICAST ||
(addr_type < 0 &&
/* postpone calculation to as late as possible */
inet_addr_type_dev_table(net, dev, sip) ==
RTN_UNICAST)))))
n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
}
......
......@@ -2320,6 +2320,10 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
* issue in __tcp_select_window()
*/
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
......
......@@ -1466,6 +1466,11 @@ static int __ip6_append_data(struct sock *sk,
*/
alloclen += sizeof(struct frag_hdr);
copy = datalen - transhdrlen - fraggap;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len,
......@@ -1515,13 +1520,9 @@ static int __ip6_append_data(struct sock *sk,
data += fraggap;
pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
if (copy < 0) {
err = -EINVAL;
kfree_skb(skb);
goto error;
} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
if (copy > 0 &&
getfrag(from, data + transhdrlen, offset,
copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
......
......@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
{
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
pr_err("shouldn't reach here, because the box is on the "
"half connection in the tun/dr module.\n");
}
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto ignore_cp;
/* Ensure the checksum is correct */
if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
......@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 0);
ignore_cp:
verdict = NF_ACCEPT;
out:
......@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
*/
cp = pp->conn_out_get(ipvs, af, skb, &iph);
if (likely(cp))
if (likely(cp)) {
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto ignore_cp;
return handle_response(af, skb, pd, cp, &iph, hooknum);
}
/* Check for real-server-started requests */
if (atomic_read(&ipvs->conn_out_counter)) {
......@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
}
}
}
out:
IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
ignore_cp:
__ip_vs_conn_put(cp);
goto out;
}
/*
......
......@@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
#endif
if (h != NULL && !try_module_get(h->me))
h = NULL;
if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
module_put(h->me);
h = NULL;
}
rcu_read_unlock();
......@@ -181,6 +185,13 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
{
refcount_dec(&helper->refcnt);
module_put(helper->me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
struct nf_conn_help *
nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper, gfp_t gfp)
......@@ -417,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
}
}
}
refcount_set(&me->refcnt, 1);
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
nf_ct_helper_count++;
out:
......
......@@ -45,6 +45,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l4proto.h>
......@@ -1007,9 +1009,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
static int
ctnetlink_parse_tuple(const struct nlattr * const cda[],
struct nf_conntrack_tuple *tuple,
enum ctattr_type type, u_int8_t l3num,
struct nf_conntrack_zone *zone)
struct nf_conntrack_tuple *tuple, u32 type,
u_int8_t l3num, struct nf_conntrack_zone *zone)
{
struct nlattr *tb[CTA_TUPLE_MAX+1];
int err;
......@@ -1828,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net,
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
nf_ct_labels_ext_add(ct);
nfct_seqadj_ext_add(ct);
nfct_synproxy_ext_add(ct);
/* we must add conntrack extensions before confirmation. */
ct->status |= IPS_CONFIRMED;
......@@ -2447,7 +2450,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum ctattr_expect type)
u32 type)
{
struct nlattr *nest_parms;
......
......@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
{
struct nf_conntrack_tuple curr_tuple, new_tuple;
/* Can't setup nat info for confirmed ct. */
if (nf_ct_is_confirmed(ct))
return NF_ACCEPT;
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
maniptype == NF_NAT_MANIP_DST);
BUG_ON(nf_nat_initialized(ct, maniptype));
......
......@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
return nf_tables_fill_setelem(args->skb, set, elem);
}
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
};
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
struct net *net = sock_net(skb->sk);
u8 genmask = nft_genmask_cur(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
bool set_found = false;
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *nest;
u32 portid, seq;
int event, err;
int event;
err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
NULL);
if (err < 0)
return err;
rcu_read_lock();
list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (afi != dump_ctx->ctx.afi)
continue;
err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
(void *)nla, genmask);
if (err < 0)
return err;
list_for_each_entry_rcu(table, &afi->tables, list) {
if (table != dump_ctx->ctx.table)
continue;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
genmask);
if (IS_ERR(set))
return PTR_ERR(set);
list_for_each_entry_rcu(set, &table->sets, list) {
if (set == dump_ctx->set) {
set_found = true;
break;
}
}
break;
}
break;
}
if (!set_found) {
rcu_read_unlock();
return -ENOENT;
}
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
portid = NETLINK_CB(cb->skb).portid;
......@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx.afi->family;
nfmsg->nfgen_family = afi->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff);
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
goto nla_put_failure;
......@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.cb = cb;
args.skb = skb;
args.iter.genmask = nft_genmask_cur(ctx.net);
args.iter.genmask = nft_genmask_cur(net);
args.iter.skip = cb->args[0];
args.iter.count = 0;
args.iter.err = 0;
args.iter.fn = nf_tables_dump_setelem;
set->ops->walk(&ctx, set, &args.iter);
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
rcu_read_unlock();
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
......@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
nla_put_failure:
rcu_read_unlock();
return -ENOSPC;
}
static int nf_tables_dump_set_done(struct netlink_callback *cb)
{
kfree(cb->data);
return 0;
}
static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
......@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_set,
.done = nf_tables_dump_set_done,
};
struct nft_set_dump_ctx *dump_ctx;
dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
if (!dump_ctx)
return -ENOMEM;
dump_ctx->set = set;
dump_ctx->ctx = ctx;
c.data = dump_ctx;
return netlink_dump_start(nlsk, skb, nlh, &c);
}
return -EOPNOTSUPP;
......@@ -3593,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
......@@ -3604,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
/* Only called from commit path, nft_set_elem_deactivate() already deals with
* the refcounting from the preparation phase.
*/
static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
}
static int nft_setelem_parse_flags(const struct nft_set *set,
const struct nlattr *attr, u32 *flags)
{
......@@ -3815,9 +3861,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&data, d2.type);
nft_data_release(&data, d2.type);
err2:
nft_data_uninit(&elem.key.val, d1.type);
nft_data_release(&elem.key.val, d1.type);
err1:
return err;
}
......@@ -3862,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return err;
}
/**
* nft_data_hold - hold a nft_data item
*
* @data: struct nft_data to release
* @type: type of data
*
* Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
* NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
* NFT_GOTO verdicts. This function must be called on active data objects
* from the second phase of the commit protocol.
*/
static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
data->verdict.chain->use++;
break;
}
}
}
static void nft_set_elem_activate(const struct net *net,
const struct nft_set *set,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
(*nft_set_ext_obj(ext))->use++;
}
static void nft_set_elem_deactivate(const struct net *net,
const struct nft_set *set,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
(*nft_set_ext_obj(ext))->use--;
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
......@@ -3927,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
kfree(elem.priv);
elem.priv = priv;
nft_set_elem_deactivate(ctx->net, set, &elem);
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
......@@ -3936,7 +4031,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
err3:
kfree(elem.priv);
err2:
nft_data_uninit(&elem.key.val, desc.type);
nft_data_release(&elem.key.val, desc.type);
err1:
return err;
}
......@@ -4743,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv, true);
nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
break;
case NFT_MSG_DELOBJ:
nft_obj_destroy(nft_trans_obj(trans));
......@@ -4979,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
nft_set_elem_activate(net, te->set, &te->elem);
te->set->ops->activate(net, te->set, &te->elem);
te->set->ndeact--;
......@@ -5464,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
EXPORT_SYMBOL_GPL(nft_data_init);
/**
* nft_data_uninit - release a nft_data item
* nft_data_release - release a nft_data item
*
* @data: struct nft_data to release
* @type: type of data
......@@ -5472,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
* Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
* all others need to be released by calling this function.
*/
void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
void nft_data_release(const struct nft_data *data, enum nft_data_types type)
{
if (type < NFT_DATA_VERDICT)
return;
......@@ -5483,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
WARN_ON(1);
}
}
EXPORT_SYMBOL_GPL(nft_data_uninit);
EXPORT_SYMBOL_GPL(nft_data_release);
int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
enum nft_data_types type, unsigned int len)
......
......@@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
tuple_set = true;
}
ret = -ENOENT;
list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
cur = &nlcth->helper;
j++;
......@@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
tuple.dst.protonum != cur->tuple.dst.protonum))
continue;
if (refcount_dec_if_one(&cur->refcnt)) {
found = true;
nf_conntrack_helper_unregister(cur);
kfree(cur->expect_policy);
list_del(&nlcth->list);
kfree(nlcth);
} else {
ret = -EBUSY;
}
}
/* Make sure we return success if we flush and there is no helpers */
return (found || j == 0) ? 0 : -ENOENT;
return (found || j == 0) ? 0 : ret;
}
static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
......
......@@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
if (d1.len != priv->len)
return -EINVAL;
if (d1.len != priv->len) {
err = -EINVAL;
goto err1;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
return err;
if (d2.len != priv->len)
return -EINVAL;
goto err1;
if (d2.len != priv->len) {
err = -EINVAL;
goto err2;
}
return 0;
err2:
nft_data_release(&priv->xor, d2.type);
err1:
nft_data_release(&priv->mask, d1.type);
return err;
}
static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
......
......@@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
if (err < 0)
return ERR_PTR(err);
if (desc.type != NFT_DATA_VALUE) {
err = -EINVAL;
goto err1;
}
if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
return &nft_cmp_fast_ops;
else
return &nft_cmp_ops;
err1:
nft_data_release(&data, desc.type);
return ERR_PTR(-EINVAL);
}
struct nft_expr_type nft_cmp_type __read_mostly = {
......
......@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
struct nft_ct_helper_obj *priv = nft_obj_data(obj);
if (priv->helper4)
module_put(priv->helper4->me);
nf_conntrack_helper_put(priv->helper4);
if (priv->helper6)
module_put(priv->helper6->me);
nf_conntrack_helper_put(priv->helper6);
}
static void nft_ct_helper_obj_eval(struct nft_object *obj,
......
......@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
return 0;
err1:
nft_data_uninit(&priv->data, desc.type);
nft_data_release(&priv->data, desc.type);
return err;
}
......@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
}
static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
......
......@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
priv->len = desc_from.len;
return 0;
err2:
nft_data_uninit(&priv->data_to, desc_to.type);
nft_data_release(&priv->data_to, desc_to.type);
err1:
nft_data_uninit(&priv->data_from, desc_from.type);
nft_data_release(&priv->data_from, desc_from.type);
return err;
}
......
......@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
int err;
err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
iter->err = err;
if (err)
return;
......
......@@ -283,28 +283,30 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
&U->u.user.revision, K->u.kernel.TYPE->revision)
int xt_data_to_user(void __user *dst, const void *src,
int usersize, int size)
int usersize, int size, int aligned_size)
{
usersize = usersize ? : size;
if (copy_to_user(dst, src, usersize))
return -EFAULT;
if (usersize != size && clear_user(dst + usersize, size - usersize))
if (usersize != aligned_size &&
clear_user(dst + usersize, aligned_size - usersize))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL_GPL(xt_data_to_user);
#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \
#define XT_DATA_TO_USER(U, K, TYPE) \
xt_data_to_user(U->data, K->data, \
K->u.kernel.TYPE->usersize, \
C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
K->u.kernel.TYPE->TYPE##size, \
XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
int xt_match_to_user(const struct xt_entry_match *m,
struct xt_entry_match __user *u)
{
return XT_OBJ_TO_USER(u, m, match, 0) ||
XT_DATA_TO_USER(u, m, match, 0);
XT_DATA_TO_USER(u, m, match);
}
EXPORT_SYMBOL_GPL(xt_match_to_user);
......@@ -312,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
struct xt_entry_target __user *u)
{
return XT_OBJ_TO_USER(u, t, target, 0) ||
XT_DATA_TO_USER(u, t, target, 0);
XT_DATA_TO_USER(u, t, target);
}
EXPORT_SYMBOL_GPL(xt_target_to_user);
......@@ -611,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \
xt_data_to_user(U->data, K->data, \
K->u.kernel.TYPE->usersize, \
C_SIZE, \
COMPAT_XT_ALIGN(C_SIZE))
int xt_compat_match_to_user(const struct xt_entry_match *m,
void __user **dstptr, unsigned int *size)
{
......@@ -626,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
if (match->compat_to_user((void __user *)cm->data, m->data))
return -EFAULT;
} else {
if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
return -EFAULT;
}
......@@ -972,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
if (target->compat_to_user((void __user *)ct->data, t->data))
return -EFAULT;
} else {
if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
return -EFAULT;
}
......
......@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
if (help == NULL) {
module_put(helper->me);
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
......@@ -263,7 +263,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
err4:
help = nfct_help(ct);
if (help)
module_put(help->helper->me);
nf_conntrack_helper_put(help->helper);
err3:
nf_ct_tmpl_free(ct);
err2:
......@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
if (ct) {
help = nfct_help(ct);
if (help)
module_put(help->helper->me);
nf_conntrack_helper_put(help->helper);
nf_ct_netns_put(par->net, par->family);
......
......@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
if (!help) {
module_put(helper->me);
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
......@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
{
if (ct_info->helper)
module_put(ct_info->helper->me);
nf_conntrack_helper_put(ct_info->helper);
if (ct_info->ct)
nf_ct_tmpl_free(ct_info->ct);
}
......
......@@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) head;
rcu_assign_pointer(tp->root, new);
call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
err_replace_hw_filter:
......
......@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
long timeout;
int err;
struct vsock_transport_send_notify_data send_data;
DEFINE_WAIT(wait);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
sk = sock->sk;
vsk = vsock_sk(sk);
......@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
while (total_written < len) {
ssize_t written;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
add_wait_queue(sk_sleep(sk), &wait);
while (vsock_stream_has_space(vsk) == 0 &&
sk->sk_err == 0 &&
!(sk->sk_shutdown & SEND_SHUTDOWN) &&
......@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Don't wait for non-blocking sockets. */
if (timeout == 0) {
err = -EAGAIN;
finish_wait(sk_sleep(sk), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
err = transport->notify_send_pre_block(vsk, &send_data);
if (err < 0) {
finish_wait(sk_sleep(sk), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
release_sock(sk);
timeout = schedule_timeout(timeout);
timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
finish_wait(sk_sleep(sk), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
} else if (timeout == 0) {
err = -EAGAIN;
finish_wait(sk_sleep(sk), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
prepare_to_wait(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
}
finish_wait(sk_sleep(sk), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
/* These checks occur both as part of and after the loop
* conditional since we need to check before and after
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment