Commit 3c7a9f32 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) In order to avoid problems in the future, make cgroup bpf overriding
    explicit using BPF_F_ALLOW_OVERRIDE. From Alexei Staovoitov.

 2) LLC sets skb->sk without proper skb->destructor and this explodes,
    fix from Eric Dumazet.

 3) Make sure when we have an ipv4 mapped source address, the
    destination is either also an ipv4 mapped address or
    ipv6_addr_any(). Fix from Jonathan T. Leighton.

 4) Avoid packet loss in fec driver by programming the multicast filter
    more intelligently. From Rui Sousa.

 5) Handle multiple threads invoking fanout_add(), fix from Eric
    Dumazet.

 6) Since we can invoke the TCP input path in process context, without
    BH being disabled, we have to accomodate that in the locking of the
    TCP probe. Also from Eric Dumazet.

 7) Fix erroneous emission of NETEVENT_DELAY_PROBE_TIME_UPDATE when we
    aren't even updating that sysctl value. From Marcus Huewe.

 8) Fix endian bugs in ibmvnic driver, from Thomas Falcon.

[ This is the second version of the pull that reverts the nested
  rhashtable changes that looked a bit too scary for this late in the
  release  - Linus ]

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (27 commits)
  rhashtable: Revert nested table changes.
  ibmvnic: Fix endian errors in error reporting output
  ibmvnic: Fix endian error when requesting device capabilities
  net: neigh: Fix netevent NETEVENT_DELAY_PROBE_TIME_UPDATE notification
  net: xilinx_emaclite: fix freezes due to unordered I/O
  net: xilinx_emaclite: fix receive buffer overflow
  bpf: kernel header files need to be copied into the tools directory
  tcp: tcp_probe: use spin_lock_bh()
  uapi: fix linux/if_pppol2tp.h userspace compilation errors
  packet: fix races in fanout_add()
  ibmvnic: Fix initial MTU settings
  net: ethernet: ti: cpsw: fix cpsw assignment in resume
  kcm: fix a null pointer dereference in kcm_sendmsg()
  net: fec: fix multicast filtering hardware setup
  ipv6: Handle IPv4-mapped src to in6addr_any dst.
  ipv6: Inhibit IPv4-mapped src address on the wire.
  net/mlx5e: Disable preemption when doing TC statistics upcall
  rhashtable: Add nested tables
  tipc: Fix tipc_sk_reinit race conditions
  gfs2: Use rhashtable walk interface in glock_hash_walk
  ...
parents 747ae0a9 bf3f14d6
......@@ -2478,12 +2478,11 @@ S: D-90453 Nuernberg
S: Germany
N: Arnaldo Carvalho de Melo
E: acme@ghostprotocols.net
E: acme@kernel.org
E: arnaldo.melo@gmail.com
E: acme@redhat.com
W: http://oops.ghostprotocols.net:81/blog/
P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
D: tools/, IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
S: Brazil
N: Karsten Merker
......
......@@ -877,8 +877,8 @@ S: Odd fixes
F: drivers/hwmon/applesmc.c
APPLETALK NETWORK LAYER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
S: Maintained
L: netdev@vger.kernel.org
S: Odd fixes
F: drivers/net/appletalk/
F: net/appletalk/
......@@ -6727,9 +6727,8 @@ S: Odd Fixes
F: drivers/tty/ipwireless/
IPX NETWORK LAYER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
L: netdev@vger.kernel.org
S: Maintained
S: Odd fixes
F: include/net/ipx.h
F: include/uapi/linux/ipx.h
F: net/ipx/
......@@ -7501,8 +7500,8 @@ S: Maintained
F: drivers/misc/lkdtm*
LLC (802.2)
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
S: Maintained
L: netdev@vger.kernel.org
S: Odd fixes
F: include/linux/llc.h
F: include/uapi/linux/llc.h
F: include/net/llc*
......@@ -13373,10 +13372,8 @@ S: Maintained
F: drivers/input/misc/wistron_btns.c
WL3501 WIRELESS PCMCIA CARD DRIVER
M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
L: linux-wireless@vger.kernel.org
W: http://oops.ghostprotocols.net:81/blog
S: Maintained
S: Odd fixes
F: drivers/net/wireless/wl3501*
WOLFSON MICROELECTRONICS DRIVERS
......
......@@ -2910,6 +2910,7 @@ static void set_multicast_list(struct net_device *ndev)
struct netdev_hw_addr *ha;
unsigned int i, bit, data, crc, tmp;
unsigned char hash;
unsigned int hash_high = 0, hash_low = 0;
if (ndev->flags & IFF_PROMISC) {
tmp = readl(fep->hwp + FEC_R_CNTRL);
......@@ -2932,11 +2933,7 @@ static void set_multicast_list(struct net_device *ndev)
return;
}
/* Clear filter and add the addresses in hash register
*/
writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
/* Add the addresses in hash register */
netdev_for_each_mc_addr(ha, ndev) {
/* calculate crc32 value of mac address */
crc = 0xffffffff;
......@@ -2954,16 +2951,14 @@ static void set_multicast_list(struct net_device *ndev)
*/
hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
if (hash > 31) {
tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
tmp |= 1 << (hash - 32);
writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
} else {
tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW);
tmp |= 1 << hash;
writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
}
if (hash > 31)
hash_high |= 1 << (hash - 32);
else
hash_low |= 1 << hash;
}
writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
}
/* Set a MAC change in hardware. */
......
......@@ -189,9 +189,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
}
ltb->map_id = adapter->map_id;
adapter->map_id++;
init_completion(&adapter->fw_done);
send_request_map(adapter, ltb->addr,
ltb->size, ltb->map_id);
init_completion(&adapter->fw_done);
wait_for_completion(&adapter->fw_done);
return 0;
}
......@@ -505,7 +506,7 @@ static int ibmvnic_open(struct net_device *netdev)
adapter->rx_pool = NULL;
rx_pool_arr_alloc_failed:
for (i = 0; i < adapter->req_rx_queues; i++)
napi_enable(&adapter->napi[i]);
napi_disable(&adapter->napi[i]);
alloc_napi_failed:
return -ENOMEM;
}
......@@ -1121,10 +1122,10 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token);
crq.request_statistics.len =
cpu_to_be32(sizeof(struct ibmvnic_statistics));
ibmvnic_send_crq(adapter, &crq);
/* Wait for data to be written */
init_completion(&adapter->stats_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->stats_done);
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
......@@ -1496,7 +1497,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
adapter->req_rx_queues = adapter->opt_rx_comp_queues;
adapter->req_rx_add_queues = adapter->max_rx_add_queues;
adapter->req_mtu = adapter->max_mtu;
adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
}
total_queues = adapter->req_tx_queues + adapter->req_rx_queues;
......@@ -2185,12 +2186,12 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
if (!found) {
dev_err(dev, "Couldn't find error id %x\n",
crq->request_error_rsp.error_id);
be32_to_cpu(crq->request_error_rsp.error_id));
return;
}
dev_err(dev, "Detailed info for error id %x:",
crq->request_error_rsp.error_id);
be32_to_cpu(crq->request_error_rsp.error_id));
for (i = 0; i < error_buff->len; i++) {
pr_cont("%02x", (int)error_buff->buff[i]);
......@@ -2269,8 +2270,8 @@ static void handle_error_indication(union ibmvnic_crq *crq,
dev_err(dev, "Firmware reports %serror id %x, cause %d\n",
crq->error_indication.
flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
crq->error_indication.error_id,
crq->error_indication.error_cause);
be32_to_cpu(crq->error_indication.error_id),
be16_to_cpu(crq->error_indication.error_cause));
error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC);
if (!error_buff)
......@@ -2388,10 +2389,10 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
case PARTIALSUCCESS:
dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n",
*req_value,
(long int)be32_to_cpu(crq->request_capability_rsp.
(long int)be64_to_cpu(crq->request_capability_rsp.
number), name);
release_sub_crqs_no_irqs(adapter);
*req_value = be32_to_cpu(crq->request_capability_rsp.number);
*req_value = be64_to_cpu(crq->request_capability_rsp.number);
init_sub_crqs(adapter, 1);
return;
default:
......@@ -2626,12 +2627,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
break;
case MIN_MTU:
adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
netdev->min_mtu = adapter->min_mtu;
netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
break;
case MAX_MTU:
adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
netdev->max_mtu = adapter->max_mtu;
netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
break;
case MAX_MULTICAST_FILTERS:
......@@ -2799,9 +2800,9 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator;
crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok);
crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size;
ibmvnic_send_crq(adapter, &crq);
init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->fw_done);
if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size))
......@@ -3581,9 +3582,9 @@ static int ibmvnic_dump_show(struct seq_file *seq, void *v)
memset(&crq, 0, sizeof(crq));
crq.request_dump_size.first = IBMVNIC_CRQ_CMD;
crq.request_dump_size.cmd = REQUEST_DUMP_SIZE;
ibmvnic_send_crq(adapter, &crq);
init_completion(&adapter->fw_done);
ibmvnic_send_crq(adapter, &crq);
wait_for_completion(&adapter->fw_done);
seq_write(seq, adapter->dump_data, adapter->dump_data_size);
......@@ -3629,8 +3630,8 @@ static void handle_crq_init_rsp(struct work_struct *work)
}
}
send_version_xchg(adapter);
reinit_completion(&adapter->init_done);
send_version_xchg(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
dev_err(dev, "Passive init timeout\n");
goto task_failed;
......@@ -3640,9 +3641,9 @@ static void handle_crq_init_rsp(struct work_struct *work)
if (adapter->renegotiate) {
adapter->renegotiate = false;
release_sub_crqs_no_irqs(adapter);
send_cap_queries(adapter);
reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
dev_err(dev, "Passive init timeout\n");
......@@ -3656,9 +3657,7 @@ static void handle_crq_init_rsp(struct work_struct *work)
goto task_failed;
netdev->real_num_tx_queues = adapter->req_tx_queues;
netdev->mtu = adapter->req_mtu;
netdev->min_mtu = adapter->min_mtu;
netdev->max_mtu = adapter->max_mtu;
netdev->mtu = adapter->req_mtu - ETH_HLEN;
if (adapter->failover) {
adapter->failover = false;
......@@ -3772,9 +3771,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->debugfs_dump = ent;
}
}
ibmvnic_send_crq_init(adapter);
init_completion(&adapter->init_done);
ibmvnic_send_crq_init(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout))
return 0;
......@@ -3782,9 +3781,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
if (adapter->renegotiate) {
adapter->renegotiate = false;
release_sub_crqs_no_irqs(adapter);
send_cap_queries(adapter);
reinit_completion(&adapter->init_done);
send_cap_queries(adapter);
if (!wait_for_completion_timeout(&adapter->init_done,
timeout))
return 0;
......@@ -3798,7 +3797,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
}
netdev->real_num_tx_queues = adapter->req_tx_queues;
netdev->mtu = adapter->req_mtu;
netdev->mtu = adapter->req_mtu - ETH_HLEN;
rc = register_netdev(netdev);
if (rc) {
......
......@@ -1087,10 +1087,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
preempt_disable();
tcf_exts_to_list(f->exts, &actions);
list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, packets, lastuse);
preempt_enable();
return 0;
}
......
......@@ -3160,7 +3160,7 @@ static int cpsw_resume(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct net_device *ndev = platform_get_drvdata(pdev);
struct cpsw_common *cpsw = netdev_priv(ndev);
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
/* Select default pin state */
pinctrl_pm_select_default_state(dev);
......
......@@ -100,6 +100,14 @@
/* BUFFER_ALIGN(adr) calculates the number of bytes to the next alignment. */
#define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32) adr)) % ALIGNMENT)
#ifdef __BIG_ENDIAN
#define xemaclite_readl ioread32be
#define xemaclite_writel iowrite32be
#else
#define xemaclite_readl ioread32
#define xemaclite_writel iowrite32
#endif
/**
* struct net_local - Our private per device data
* @ndev: instance of the network device
......@@ -156,15 +164,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata)
u32 reg_data;
/* Enable the Tx interrupts for the first Buffer */
reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET);
__raw_writel(reg_data | XEL_TSR_XMIT_IE_MASK,
reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET);
xemaclite_writel(reg_data | XEL_TSR_XMIT_IE_MASK,
drvdata->base_addr + XEL_TSR_OFFSET);
/* Enable the Rx interrupts for the first buffer */
__raw_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET);
xemaclite_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET);
/* Enable the Global Interrupt Enable */
__raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET);
xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET);
}
/**
......@@ -179,16 +187,16 @@ static void xemaclite_disable_interrupts(struct net_local *drvdata)
u32 reg_data;
/* Disable the Global Interrupt Enable */
__raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET);
xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET);
/* Disable the Tx interrupts for the first buffer */
reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET);
__raw_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK),
reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET);
xemaclite_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK),
drvdata->base_addr + XEL_TSR_OFFSET);
/* Disable the Rx interrupts for the first buffer */
reg_data = __raw_readl(drvdata->base_addr + XEL_RSR_OFFSET);
__raw_writel(reg_data & (~XEL_RSR_RECV_IE_MASK),
reg_data = xemaclite_readl(drvdata->base_addr + XEL_RSR_OFFSET);
xemaclite_writel(reg_data & (~XEL_RSR_RECV_IE_MASK),
drvdata->base_addr + XEL_RSR_OFFSET);
}
......@@ -321,7 +329,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
byte_count = ETH_FRAME_LEN;
/* Check if the expected buffer is available */
reg_data = __raw_readl(addr + XEL_TSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET);
if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK |
XEL_TSR_XMIT_ACTIVE_MASK)) == 0) {
......@@ -334,7 +342,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
addr = (void __iomem __force *)((u32 __force)addr ^
XEL_BUFFER_OFFSET);
reg_data = __raw_readl(addr + XEL_TSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET);
if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK |
XEL_TSR_XMIT_ACTIVE_MASK)) != 0)
......@@ -345,16 +353,16 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
/* Write the frame to the buffer */
xemaclite_aligned_write(data, (u32 __force *) addr, byte_count);
__raw_writel((byte_count & XEL_TPLR_LENGTH_MASK),
xemaclite_writel((byte_count & XEL_TPLR_LENGTH_MASK),
addr + XEL_TPLR_OFFSET);
/* Update the Tx Status Register to indicate that there is a
* frame to send. Set the XEL_TSR_XMIT_ACTIVE_MASK flag which
* is used by the interrupt handler to check whether a frame
* has been transmitted */
reg_data = __raw_readl(addr + XEL_TSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET);
reg_data |= (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK);
__raw_writel(reg_data, addr + XEL_TSR_OFFSET);
xemaclite_writel(reg_data, addr + XEL_TSR_OFFSET);
return 0;
}
......@@ -369,7 +377,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
*
* Return: Total number of bytes received
*/
static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data)
static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
{
void __iomem *addr;
u16 length, proto_type;
......@@ -379,7 +387,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data)
addr = (drvdata->base_addr + drvdata->next_rx_buf_to_use);
/* Verify which buffer has valid data */
reg_data = __raw_readl(addr + XEL_RSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET);
if ((reg_data & XEL_RSR_RECV_DONE_MASK) == XEL_RSR_RECV_DONE_MASK) {
if (drvdata->rx_ping_pong != 0)
......@@ -396,27 +404,28 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data)
return 0; /* No data was available */
/* Verify that buffer has valid data */
reg_data = __raw_readl(addr + XEL_RSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET);
if ((reg_data & XEL_RSR_RECV_DONE_MASK) !=
XEL_RSR_RECV_DONE_MASK)
return 0; /* No data was available */
}
/* Get the protocol type of the ethernet frame that arrived */
proto_type = ((ntohl(__raw_readl(addr + XEL_HEADER_OFFSET +
proto_type = ((ntohl(xemaclite_readl(addr + XEL_HEADER_OFFSET +
XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) &
XEL_RPLR_LENGTH_MASK);
/* Check if received ethernet frame is a raw ethernet frame
* or an IP packet or an ARP packet */
if (proto_type > (ETH_FRAME_LEN + ETH_FCS_LEN)) {
if (proto_type > ETH_DATA_LEN) {
if (proto_type == ETH_P_IP) {
length = ((ntohl(__raw_readl(addr +
length = ((ntohl(xemaclite_readl(addr +
XEL_HEADER_IP_LENGTH_OFFSET +
XEL_RXBUFF_OFFSET)) >>
XEL_HEADER_SHIFT) &
XEL_RPLR_LENGTH_MASK);
length = min_t(u16, length, ETH_DATA_LEN);
length += ETH_HLEN + ETH_FCS_LEN;
} else if (proto_type == ETH_P_ARP)
......@@ -429,14 +438,17 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data)
/* Use the length in the frame, plus the header and trailer */
length = proto_type + ETH_HLEN + ETH_FCS_LEN;
if (WARN_ON(length > maxlen))
length = maxlen;
/* Read from the EmacLite device */
xemaclite_aligned_read((u32 __force *) (addr + XEL_RXBUFF_OFFSET),
data, length);
/* Acknowledge the frame */
reg_data = __raw_readl(addr + XEL_RSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET);
reg_data &= ~XEL_RSR_RECV_DONE_MASK;
__raw_writel(reg_data, addr + XEL_RSR_OFFSET);
xemaclite_writel(reg_data, addr + XEL_RSR_OFFSET);
return length;
}
......@@ -463,14 +475,14 @@ static void xemaclite_update_address(struct net_local *drvdata,
xemaclite_aligned_write(address_ptr, (u32 __force *) addr, ETH_ALEN);
__raw_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET);
xemaclite_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET);
/* Update the MAC address in the EmacLite */
reg_data = __raw_readl(addr + XEL_TSR_OFFSET);
__raw_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET);
reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET);
xemaclite_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET);
/* Wait for EmacLite to finish with the MAC address update */
while ((__raw_readl(addr + XEL_TSR_OFFSET) &
while ((xemaclite_readl(addr + XEL_TSR_OFFSET) &
XEL_TSR_PROG_MAC_ADDR) != 0)
;
}
......@@ -603,7 +615,7 @@ static void xemaclite_rx_handler(struct net_device *dev)
skb_reserve(skb, 2);
len = xemaclite_recv_data(lp, (u8 *) skb->data);
len = xemaclite_recv_data(lp, (u8 *) skb->data, len);
if (!len) {
dev->stats.rx_errors++;
......@@ -640,31 +652,31 @@ static irqreturn_t xemaclite_interrupt(int irq, void *dev_id)
u32 tx_status;
/* Check if there is Rx Data available */
if ((__raw_readl(base_addr + XEL_RSR_OFFSET) &
if ((xemaclite_readl(base_addr + XEL_RSR_OFFSET) &
XEL_RSR_RECV_DONE_MASK) ||
(__raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET)
(xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET)
& XEL_RSR_RECV_DONE_MASK))
xemaclite_rx_handler(dev);
/* Check if the Transmission for the first buffer is completed */
tx_status = __raw_readl(base_addr + XEL_TSR_OFFSET);
tx_status = xemaclite_readl(base_addr + XEL_TSR_OFFSET);
if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) &&
(tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) {
tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK;
__raw_writel(tx_status, base_addr + XEL_TSR_OFFSET);
xemaclite_writel(tx_status, base_addr + XEL_TSR_OFFSET);
tx_complete = true;
}
/* Check if the Transmission for the second buffer is completed */
tx_status = __raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET);
tx_status = xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET);
if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) &&
(tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) {
tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK;
__raw_writel(tx_status, base_addr + XEL_BUFFER_OFFSET +
xemaclite_writel(tx_status, base_addr + XEL_BUFFER_OFFSET +
XEL_TSR_OFFSET);
tx_complete = true;
......@@ -698,7 +710,7 @@ static int xemaclite_mdio_wait(struct net_local *lp)
/* wait for the MDIO interface to not be busy or timeout
after some time.
*/
while (__raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) &
while (xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) &
XEL_MDIOCTRL_MDIOSTS_MASK) {
if (time_before_eq(end, jiffies)) {
WARN_ON(1);
......@@ -734,17 +746,17 @@ static int xemaclite_mdio_read(struct mii_bus *bus, int phy_id, int reg)
* MDIO Address register. Set the Status bit in the MDIO Control
* register to start a MDIO read transaction.
*/
ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET);
__raw_writel(XEL_MDIOADDR_OP_MASK |
ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET);
xemaclite_writel(XEL_MDIOADDR_OP_MASK |
((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg),
lp->base_addr + XEL_MDIOADDR_OFFSET);
__raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK,
xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK,
lp->base_addr + XEL_MDIOCTRL_OFFSET);
if (xemaclite_mdio_wait(lp))
return -ETIMEDOUT;
rc = __raw_readl(lp->base_addr + XEL_MDIORD_OFFSET);
rc = xemaclite_readl(lp->base_addr + XEL_MDIORD_OFFSET);
dev_dbg(&lp->ndev->dev,
"xemaclite_mdio_read(phy_id=%i, reg=%x) == %x\n",
......@@ -781,12 +793,12 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
* Data register. Finally, set the Status bit in the MDIO Control
* register to start a MDIO write transaction.
*/
ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET);
__raw_writel(~XEL_MDIOADDR_OP_MASK &
ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET);
xemaclite_writel(~XEL_MDIOADDR_OP_MASK &
((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg),
lp->base_addr + XEL_MDIOADDR_OFFSET);
__raw_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET);
__raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK,
xemaclite_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET);
xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK,
lp->base_addr + XEL_MDIOCTRL_OFFSET);
return 0;
......@@ -834,7 +846,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
/* Enable the MDIO bus by asserting the enable bit in MDIO Control
* register.
*/
__raw_writel(XEL_MDIOCTRL_MDIOEN_MASK,
xemaclite_writel(XEL_MDIOCTRL_MDIOEN_MASK,
lp->base_addr + XEL_MDIOCTRL_OFFSET);
bus = mdiobus_alloc();
......@@ -1140,8 +1152,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
}
/* Clear the Tx CSR's in case this is a restart */
__raw_writel(0, lp->base_addr + XEL_TSR_OFFSET);
__raw_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET);
xemaclite_writel(0, lp->base_addr + XEL_TSR_OFFSET);
xemaclite_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET);
/* Set the MAC address in the EmacLite device */
xemaclite_update_address(lp, ndev->dev_addr);
......
......@@ -113,10 +113,10 @@ struct xenvif_stats {
* A subset of struct net_device_stats that contains only the
* fields that are updated in netback.c for each queue.
*/
unsigned int rx_bytes;
unsigned int rx_packets;
unsigned int tx_bytes;
unsigned int tx_packets;
u64 rx_bytes;
u64 rx_packets;
u64 tx_bytes;
u64 tx_packets;
/* Additional stats used by xenvif */
unsigned long rx_gso_checksum_fixup;
......
......@@ -221,10 +221,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
unsigned long rx_bytes = 0;
unsigned long rx_packets = 0;
unsigned long tx_bytes = 0;
unsigned long tx_packets = 0;
u64 rx_bytes = 0;
u64 rx_packets = 0;
u64 tx_bytes = 0;
u64 tx_packets = 0;
unsigned int index;
spin_lock(&vif->lock);
......
......@@ -21,20 +21,19 @@ struct cgroup_bpf {
*/
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
bool disallow_override[MAX_BPF_ATTACH_TYPE];
};
void cgroup_bpf_put(struct cgroup *cgrp);
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
void __cgroup_bpf_update(struct cgroup *cgrp,
struct cgroup *parent,
struct bpf_prog *prog,
enum bpf_attach_type type);
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
struct bpf_prog *prog, enum bpf_attach_type type,
bool overridable);
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
void cgroup_bpf_update(struct cgroup *cgrp,
struct bpf_prog *prog,
enum bpf_attach_type type);
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, bool overridable);
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
......
......@@ -116,6 +116,12 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */
......@@ -171,6 +177,7 @@ union bpf_attr {
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
};
} __attribute__((aligned(8)));
......
......@@ -9,9 +9,8 @@
#include <linux/types.h>
#include <linux/socket.h>
#ifndef __KERNEL__
#include <netinet/in.h>
#endif
#include <linux/in.h>
#include <linux/in6.h>
#define IPPROTO_L2TP 115
......@@ -31,7 +30,7 @@ struct sockaddr_l2tpip {
__u32 l2tp_conn_id; /* Connection ID of tunnel */
/* Pad to size of `struct sockaddr'. */
unsigned char __pad[sizeof(struct sockaddr) -
unsigned char __pad[__SOCK_SIZE__ -
sizeof(__kernel_sa_family_t) -
sizeof(__be16) - sizeof(struct in_addr) -
sizeof(__u32)];
......
......@@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
e = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
rcu_assign_pointer(cgrp->bpf.effective[type], e);
cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
}
}
......@@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
*
* Must be called with cgroup_mutex held.
*/
void __cgroup_bpf_update(struct cgroup *cgrp,
struct cgroup *parent,
struct bpf_prog *prog,
enum bpf_attach_type type)
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
struct bpf_prog *prog, enum bpf_attach_type type,
bool new_overridable)
{
struct bpf_prog *old_prog, *effective;
struct bpf_prog *old_prog, *effective = NULL;
struct cgroup_subsys_state *pos;
bool overridable = true;
if (parent) {
overridable = !parent->bpf.disallow_override[type];
effective = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
}
old_prog = xchg(cgrp->bpf.prog + type, prog);
if (prog && effective && !overridable)
/* if parent has non-overridable prog attached, disallow
* attaching new programs to descendent cgroup
*/
return -EPERM;
if (prog && effective && overridable != new_overridable)
/* if parent has overridable prog attached, only
* allow overridable programs in descendent cgroup
*/
return -EPERM;
effective = (!prog && parent) ?
rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex)) :
prog;
old_prog = cgrp->bpf.prog[type];
if (prog) {
overridable = new_overridable;
effective = prog;
if (old_prog &&
cgrp->bpf.disallow_override[type] == new_overridable)
/* disallow attaching non-overridable on top
* of existing overridable in this cgroup
* and vice versa
*/
return -EPERM;
}
if (!prog && !old_prog)
/* report error when trying to detach and nothing is attached */
return -ENOENT;
cgrp->bpf.prog[type] = prog;
css_for_each_descendant_pre(pos, &cgrp->self) {
struct cgroup *desc = container_of(pos, struct cgroup, self);
/* skip the subtree if the descendant has its own program */
if (desc->bpf.prog[type] && desc != cgrp)
if (desc->bpf.prog[type] && desc != cgrp) {
pos = css_rightmost_descendant(pos);
else
} else {
rcu_assign_pointer(desc->bpf.effective[type],
effective);
desc->bpf.disallow_override[type] = !overridable;
}
}
if (prog)
......@@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
return 0;
}
/**
......
......@@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr)
#ifdef CONFIG_CGROUP_BPF
#define BPF_PROG_ATTACH_LAST_FIELD attach_type
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
static int bpf_prog_attach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
struct bpf_prog *prog;
struct cgroup *cgrp;
enum bpf_prog_type ptype;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
......@@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
return -EINVAL;
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
......@@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return PTR_ERR(cgrp);
}
cgroup_bpf_update(cgrp, prog, attr->attach_type);
ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
if (ret)
bpf_prog_put(prog);
cgroup_put(cgrp);
return 0;
return ret;
}
#define BPF_PROG_DETACH_LAST_FIELD attach_type
......@@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr)
{
struct cgroup *cgrp;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
......@@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
cgroup_bpf_update(cgrp, NULL, attr->attach_type);
ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
cgroup_put(cgrp);
break;
......@@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return -EINVAL;
}
return 0;
return ret;
}
#endif /* CONFIG_CGROUP_BPF */
......
......@@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void)
subsys_initcall(cgroup_namespaces_init);
#ifdef CONFIG_CGROUP_BPF
void cgroup_bpf_update(struct cgroup *cgrp,
struct bpf_prog *prog,
enum bpf_attach_type type)
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, bool overridable)
{
struct cgroup *parent = cgroup_parent(cgrp);
int ret;
mutex_lock(&cgroup_mutex);
__cgroup_bpf_update(cgrp, parent, prog, type);
ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
mutex_unlock(&cgroup_mutex);
return ret;
}
#endif /* CONFIG_CGROUP_BPF */
......
......@@ -2923,6 +2923,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return;
set_bit(index, p->data_state);
if (index == NEIGH_VAR_DELAY_PROBE_TIME)
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
......
......@@ -1263,7 +1263,7 @@ void __init arp_init(void)
/*
* ax25 -> ASCII conversion
*/
static char *ax2asc2(ax25_address *a, char *buf)
static void ax2asc2(ax25_address *a, char *buf)
{
char c, *s;
int n;
......@@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf)
*s++ = n + '0';
*s++ = '\0';
if (*buf == '\0' || *buf == '-')
return "*";
return buf;
if (*buf == '\0' || *buf == '-') {
buf[0] = '*';
buf[1] = '\0';
}
}
#endif /* CONFIG_AX25 */
......@@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n",
tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
read_unlock(&n->lock);
}
......
......@@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
spin_lock(&tcp_probe.lock);
spin_lock_bh(&tcp_probe.lock);
/* If log fills, just silently drop */
if (tcp_probe_avail() > 1) {
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
......@@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
}
tcp_probe.lastcwnd = tp->snd_cwnd;
spin_unlock(&tcp_probe.lock);
spin_unlock_bh(&tcp_probe.lock);
wake_up(&tcp_probe.wait);
}
......
......@@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (np->sndflow)
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
addr_type = ipv6_addr_type(&usin->sin6_addr);
if (addr_type == IPV6_ADDR_ANY) {
if (ipv6_addr_any(&usin->sin6_addr)) {
/*
* connect to self
*/
usin->sin6_addr.s6_addr[15] = 0x01;
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
&usin->sin6_addr);
else
usin->sin6_addr = in6addr_loopback;
}
addr_type = ipv6_addr_type(&usin->sin6_addr);
daddr = &usin->sin6_addr;
if (addr_type == IPV6_ADDR_MAPPED) {
if (addr_type & IPV6_ADDR_MAPPED) {
struct sockaddr_in sin;
if (__ipv6_only_sock(sk)) {
......
......@@ -1021,6 +1021,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
}
}
#endif
if (ipv6_addr_v4mapped(&fl6->saddr) &&
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr)))
return -EAFNOSUPPORT;
return 0;
......
......@@ -148,8 +148,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* connect() to INADDR_ANY means loopback (BSD'ism).
*/
if (ipv6_addr_any(&usin->sin6_addr))
usin->sin6_addr.s6_addr[15] = 0x1;
if (ipv6_addr_any(&usin->sin6_addr)) {
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
&usin->sin6_addr);
else
usin->sin6_addr = in6addr_loopback;
}
addr_type = ipv6_addr_type(&usin->sin6_addr);
......@@ -188,7 +193,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* TCP over IPv4
*/
if (addr_type == IPV6_ADDR_MAPPED) {
if (addr_type & IPV6_ADDR_MAPPED) {
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
......
......@@ -1033,6 +1033,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
daddr = &sin6->sin6_addr;
if (ipv6_addr_any(daddr) &&
ipv6_addr_v4mapped(&np->saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
daddr);
break;
case AF_INET:
goto do_udp_sendmsg;
......
......@@ -1044,9 +1044,11 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
} else {
/* Message not complete, save state */
partial_message:
if (head) {
kcm->seq_skb = head;
kcm_tx_msg(head)->last_skb = skb;
}
}
KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
......
......@@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
* another trick required to cope with how the PROCOM state
* machine works. -acme
*/
skb_orphan(skb);
sock_hold(sk);
skb->sk = sk;
skb->destructor = sock_efree;
}
if (!sock_owned_by_user(sk))
llc_conn_rcv(sk, skb);
......
......@@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
ev->type = LLC_SAP_EV_TYPE_PDU;
ev->reason = 0;
skb_orphan(skb);
sock_hold(sk);
skb->sk = sk;
skb->destructor = sock_efree;
llc_sap_state_process(sap, skb);
}
......
......@@ -1619,6 +1619,7 @@ static void fanout_release_data(struct packet_fanout *f)
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_rollover *rollover = NULL;
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f, *match;
u8 type = type_flags & 0xff;
......@@ -1641,23 +1642,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
return -EINVAL;
}
mutex_lock(&fanout_mutex);
err = -EINVAL;
if (!po->running)
return -EINVAL;
goto out;
err = -EALREADY;
if (po->fanout)
return -EALREADY;
goto out;
if (type == PACKET_FANOUT_ROLLOVER ||
(type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
if (!po->rollover)
return -ENOMEM;
atomic_long_set(&po->rollover->num, 0);
atomic_long_set(&po->rollover->num_huge, 0);
atomic_long_set(&po->rollover->num_failed, 0);
err = -ENOMEM;
rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
if (!rollover)
goto out;
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
po->rollover = rollover;
}
mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
if (f->id == id &&
......@@ -1704,11 +1710,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
}
out:
mutex_unlock(&fanout_mutex);
if (err) {
kfree(po->rollover);
if (err && rollover) {
kfree(rollover);
po->rollover = NULL;
}
mutex_unlock(&fanout_mutex);
return err;
}
......@@ -1717,11 +1723,9 @@ static void fanout_release(struct sock *sk)
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f;
f = po->fanout;
if (!f)
return;
mutex_lock(&fanout_mutex);
f = po->fanout;
if (f) {
po->fanout = NULL;
if (atomic_dec_and_test(&f->sk_ref)) {
......@@ -1730,10 +1734,11 @@ static void fanout_release(struct sock *sk)
fanout_release_data(f);
kfree(f);
}
mutex_unlock(&fanout_mutex);
if (po->rollover)
kfree_rcu(po->rollover, rcu);
}
mutex_unlock(&fanout_mutex);
}
static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
......
......@@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict)
return EXIT_FAILURE;
}
ret = bpf_prog_attach(prog_fd, cg_fd, type);
ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
......
......@@ -79,11 +79,12 @@ int main(int argc, char **argv)
if (join_cgroup(FOO))
goto err;
if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) {
if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo");
goto err;
}
printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
assert(system(PING_CMD) != 0);
/* Create cgroup /foo/bar, get fd, and join it */
......@@ -94,24 +95,27 @@ int main(int argc, char **argv)
if (join_cgroup(BAR))
goto err;
printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
log_err("Detaching program from /foo/bar");
goto err;
}
printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
"This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
......@@ -121,8 +125,60 @@ int main(int argc, char **argv)
goto err;
}
printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
"This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
errno = 0;
log_err("Unexpected success attaching prog to /foo/bar");
goto err;
}
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
log_err("Detaching program from /foo/bar");
goto err;
}
if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
errno = 0;
log_err("Unexpected success in double detach from /foo");
goto err;
}
if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
log_err("Attaching non-overridable prog to /foo");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
errno = 0;
log_err("Unexpected success attaching non-overridable prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo/bar");
goto err;
}
if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
errno = 0;
log_err("Unexpected success attaching overridable prog to /foo");
goto err;
}
if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
log_err("Attaching different non-overridable prog to /foo");
goto err;
}
goto out;
err:
......@@ -132,5 +188,9 @@ int main(int argc, char **argv)
close(foo);
close(bar);
cleanup_cgroup_environment();
if (!rc)
printf("PASS\n");
else
printf("FAIL\n");
return rc;
}
......@@ -75,7 +75,7 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
......
......@@ -55,7 +55,7 @@ int main(int argc, char **argv)
}
ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
BPF_CGROUP_INET_SOCK_CREATE);
BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
......
......@@ -116,6 +116,12 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */
......@@ -171,6 +177,7 @@ union bpf_attr {
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
};
} __attribute__((aligned(8)));
......
......@@ -168,7 +168,8 @@ int bpf_obj_get(const char *pathname)
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
{
union bpf_attr attr;
......@@ -176,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
attr.attach_flags = flags;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
......
......@@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, void *key);
int bpf_map_get_next_key(int fd, void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment