cls_api.c 96.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * net/sched/cls_api.c	Packet classifier API.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Changes:
 *
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
17
#include <linux/err.h>
Linus Torvalds's avatar
Linus Torvalds committed
18 19 20
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
21
#include <linux/slab.h>
22
#include <linux/idr.h>
23
#include <linux/jhash.h>
24
#include <linux/rculist.h>
25 26
#include <net/net_namespace.h>
#include <net/sock.h>
27
#include <net/netlink.h>
Linus Torvalds's avatar
Linus Torvalds committed
28 29
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
30
#include <net/tc_act/tc_pedit.h>
31 32 33 34 35
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_csum.h>
#include <net/tc_act/tc_gact.h>
36
#include <net/tc_act/tc_police.h>
37
#include <net/tc_act/tc_sample.h>
38
#include <net/tc_act/tc_skbedit.h>
Paul Blakey's avatar
Paul Blakey committed
39
#include <net/tc_act/tc_ct.h>
40
#include <net/tc_act/tc_mpls.h>
41
#include <net/tc_act/tc_gate.h>
42
#include <net/flow_offload.h>
Linus Torvalds's avatar
Linus Torvalds committed
43

44 45
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];

Linus Torvalds's avatar
Linus Torvalds committed
46
/* The list of all installed classifier types */
47
static LIST_HEAD(tcf_proto_base);
Linus Torvalds's avatar
Linus Torvalds committed
48 49 50 51

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
{
	return jhash_3words(tp->chain->index, tp->prio,
			    (__force __u32)tp->protocol, 0);
}

static void tcf_proto_signal_destroying(struct tcf_chain *chain,
					struct tcf_proto *tp)
{
	struct tcf_block *block = chain->block;

	mutex_lock(&block->proto_destroy_lock);
	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
		     destroy_obj_hashfn(tp));
	mutex_unlock(&block->proto_destroy_lock);
}

static bool tcf_proto_cmp(const struct tcf_proto *tp1,
			  const struct tcf_proto *tp2)
{
	return tp1->chain->index == tp2->chain->index &&
	       tp1->prio == tp2->prio &&
	       tp1->protocol == tp2->protocol;
}

static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
					struct tcf_proto *tp)
{
	u32 hash = destroy_obj_hashfn(tp);
	struct tcf_proto *iter;
	bool found = false;

	rcu_read_lock();
	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
				   destroy_ht_node, hash) {
		if (tcf_proto_cmp(tp, iter)) {
			found = true;
			break;
		}
	}
	rcu_read_unlock();

	return found;
}

static void
tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
{
	struct tcf_block *block = chain->block;

	mutex_lock(&block->proto_destroy_lock);
	if (hash_hashed(&tp->destroy_ht_node))
		hash_del_rcu(&tp->destroy_ht_node);
	mutex_unlock(&block->proto_destroy_lock);
}

Linus Torvalds's avatar
Linus Torvalds committed
108 109
/* Find classifier type by string name */

110
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
Linus Torvalds's avatar
Linus Torvalds committed
111
{
112
	const struct tcf_proto_ops *t, *res = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
113 114 115

	if (kind) {
		read_lock(&cls_mod_lock);
116
		list_for_each_entry(t, &tcf_proto_base, head) {
117
			if (strcmp(kind, t->kind) == 0) {
118 119
				if (try_module_get(t->owner))
					res = t;
Linus Torvalds's avatar
Linus Torvalds committed
120 121 122 123 124
				break;
			}
		}
		read_unlock(&cls_mod_lock);
	}
125
	return res;
Linus Torvalds's avatar
Linus Torvalds committed
126 127
}

128
static const struct tcf_proto_ops *
129 130
tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
		     struct netlink_ext_ack *extack)
131 132 133 134 135 136 137
{
	const struct tcf_proto_ops *ops;

	ops = __tcf_proto_lookup_ops(kind);
	if (ops)
		return ops;
#ifdef CONFIG_MODULES
138 139
	if (rtnl_held)
		rtnl_unlock();
140
	request_module("cls_%s", kind);
141 142
	if (rtnl_held)
		rtnl_lock();
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
	ops = __tcf_proto_lookup_ops(kind);
	/* We dropped the RTNL semaphore in order to perform
	 * the module load. So, even if we succeeded in loading
	 * the module we have to replay the request. We indicate
	 * this using -EAGAIN.
	 */
	if (ops) {
		module_put(ops->owner);
		return ERR_PTR(-EAGAIN);
	}
#endif
	NL_SET_ERR_MSG(extack, "TC classifier not found");
	return ERR_PTR(-ENOENT);
}

Linus Torvalds's avatar
Linus Torvalds committed
158 159 160 161
/* Register(unregister) new classifier type */

int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
162
	struct tcf_proto_ops *t;
Linus Torvalds's avatar
Linus Torvalds committed
163 164 165
	int rc = -EEXIST;

	write_lock(&cls_mod_lock);
166
	list_for_each_entry(t, &tcf_proto_base, head)
Linus Torvalds's avatar
Linus Torvalds committed
167 168 169
		if (!strcmp(ops->kind, t->kind))
			goto out;

170
	list_add_tail(&ops->head, &tcf_proto_base);
Linus Torvalds's avatar
Linus Torvalds committed
171 172 173 174 175
	rc = 0;
out:
	write_unlock(&cls_mod_lock);
	return rc;
}
176
EXPORT_SYMBOL(register_tcf_proto_ops);
Linus Torvalds's avatar
Linus Torvalds committed
177

178 179
static struct workqueue_struct *tc_filter_wq;

Linus Torvalds's avatar
Linus Torvalds committed
180 181
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
182
	struct tcf_proto_ops *t;
Linus Torvalds's avatar
Linus Torvalds committed
183 184
	int rc = -ENOENT;

185 186 187 188
	/* Wait for outstanding call_rcu()s, if any, from a
	 * tcf_proto_ops's destroy() handler.
	 */
	rcu_barrier();
189
	flush_workqueue(tc_filter_wq);
190

Linus Torvalds's avatar
Linus Torvalds committed
191
	write_lock(&cls_mod_lock);
192 193 194 195
	list_for_each_entry(t, &tcf_proto_base, head) {
		if (t == ops) {
			list_del(&t->head);
			rc = 0;
Linus Torvalds's avatar
Linus Torvalds committed
196
			break;
197 198
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
199 200 201
	write_unlock(&cls_mod_lock);
	return rc;
}
202
EXPORT_SYMBOL(unregister_tcf_proto_ops);
Linus Torvalds's avatar
Linus Torvalds committed
203

Cong Wang's avatar
Cong Wang committed
204
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
205
{
Cong Wang's avatar
Cong Wang committed
206 207
	INIT_RCU_WORK(rwork, func);
	return queue_rcu_work(tc_filter_wq, rwork);
208 209 210
}
EXPORT_SYMBOL(tcf_queue_work);

Linus Torvalds's avatar
Linus Torvalds committed
211 212
/* Select new prio value from the range, managed by kernel. */

213
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
Linus Torvalds's avatar
Linus Torvalds committed
214
{
215
	u32 first = TC_H_MAKE(0xC0000000U, 0U);
Linus Torvalds's avatar
Linus Torvalds committed
216 217

	if (tp)
Eric Dumazet's avatar
Eric Dumazet committed
218
		first = tp->prio - 1;
Linus Torvalds's avatar
Linus Torvalds committed
219

220
	return TC_H_MAJ(first);
Linus Torvalds's avatar
Linus Torvalds committed
221 222
}

223 224 225
static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
{
	if (kind)
226
		return nla_strscpy(name, kind, IFNAMSIZ) < 0;
227 228 229 230
	memset(name, 0, IFNAMSIZ);
	return false;
}

231 232 233 234 235
static bool tcf_proto_is_unlocked(const char *kind)
{
	const struct tcf_proto_ops *ops;
	bool ret;

236 237 238
	if (strlen(kind) == 0)
		return false;

239 240 241 242 243 244 245 246 247 248 249 250
	ops = tcf_proto_lookup_ops(kind, false, NULL);
	/* On error return false to take rtnl lock. Proto lookup/create
	 * functions will perform lookup again and properly handle errors.
	 */
	if (IS_ERR(ops))
		return false;

	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
	module_put(ops->owner);
	return ret;
}

251
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
252
					  u32 prio, struct tcf_chain *chain,
253
					  bool rtnl_held,
254
					  struct netlink_ext_ack *extack)
255 256 257 258 259 260 261 262
{
	struct tcf_proto *tp;
	int err;

	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
	if (!tp)
		return ERR_PTR(-ENOBUFS);

263
	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
264 265
	if (IS_ERR(tp->ops)) {
		err = PTR_ERR(tp->ops);
266
		goto errout;
267 268 269 270
	}
	tp->classify = tp->ops->classify;
	tp->protocol = protocol;
	tp->prio = prio;
271
	tp->chain = chain;
272
	spin_lock_init(&tp->lock);
273
	refcount_set(&tp->refcnt, 1);
274 275 276 277 278 279 280 281 282 283 284 285 286

	err = tp->ops->init(tp);
	if (err) {
		module_put(tp->ops->owner);
		goto errout;
	}
	return tp;

errout:
	kfree(tp);
	return ERR_PTR(err);
}

287 288 289 290 291 292 293
static void tcf_proto_get(struct tcf_proto *tp)
{
	refcount_inc(&tp->refcnt);
}

static void tcf_chain_put(struct tcf_chain *chain);

294
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
295
			      bool sig_destroy, struct netlink_ext_ack *extack)
296
{
297
	tp->ops->destroy(tp, rtnl_held, extack);
298 299
	if (sig_destroy)
		tcf_proto_signal_destroyed(tp->chain, tp);
300
	tcf_chain_put(tp->chain);
301 302
	module_put(tp->ops->owner);
	kfree_rcu(tp, rcu);
303 304
}

305
static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
306 307 308
			  struct netlink_ext_ack *extack)
{
	if (refcount_dec_and_test(&tp->refcnt))
309
		tcf_proto_destroy(tp, rtnl_held, true, extack);
310 311
}

312
static bool tcf_proto_check_delete(struct tcf_proto *tp)
313
{
314 315
	if (tp->ops->delete_empty)
		return tp->ops->delete_empty(tp);
316

317
	tp->deleting = true;
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
	return tp->deleting;
}

static void tcf_proto_mark_delete(struct tcf_proto *tp)
{
	spin_lock(&tp->lock);
	tp->deleting = true;
	spin_unlock(&tp->lock);
}

static bool tcf_proto_is_deleting(struct tcf_proto *tp)
{
	bool deleting;

	spin_lock(&tp->lock);
	deleting = tp->deleting;
	spin_unlock(&tp->lock);

	return deleting;
}

339 340 341
#define ASSERT_BLOCK_LOCKED(block)					\
	lockdep_assert_held(&(block)->lock)

342 343 344 345 346 347
struct tcf_filter_chain_list_item {
	struct list_head list;
	tcf_chain_head_change_t *chain_head_change;
	void *chain_head_change_priv;
};

348 349
static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
					  u32 chain_index)
350
{
351 352
	struct tcf_chain *chain;

353 354
	ASSERT_BLOCK_LOCKED(block);

355 356 357
	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
	if (!chain)
		return NULL;
358
	list_add_tail_rcu(&chain->list, &block->chain_list);
359
	mutex_init(&chain->filter_chain_lock);
360 361
	chain->block = block;
	chain->index = chain_index;
362
	chain->refcnt = 1;
363 364
	if (!chain->index)
		block->chain0.chain = chain;
365
	return chain;
366 367
}

368 369 370 371 372 373
static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
				       struct tcf_proto *tp_head)
{
	if (item->chain_head_change)
		item->chain_head_change(tp_head, item->chain_head_change_priv);
}
374 375 376

static void tcf_chain0_head_change(struct tcf_chain *chain,
				   struct tcf_proto *tp_head)
377
{
378
	struct tcf_filter_chain_list_item *item;
379
	struct tcf_block *block = chain->block;
380

381 382
	if (chain->index)
		return;
383 384

	mutex_lock(&block->lock);
385
	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
386
		tcf_chain_head_change_item(item, tp_head);
387
	mutex_unlock(&block->lock);
388 389
}

390 391 392
/* Returns true if block can be safely freed. */

static bool tcf_chain_detach(struct tcf_chain *chain)
Jiri Pirko's avatar
Jiri Pirko committed
393
{
394 395
	struct tcf_block *block = chain->block;

396 397
	ASSERT_BLOCK_LOCKED(block);

398
	list_del_rcu(&chain->list);
399 400
	if (!chain->index)
		block->chain0.chain = NULL;
401 402 403 404 405 406 407 408 409 410 411

	if (list_empty(&block->chain_list) &&
	    refcount_read(&block->refcnt) == 0)
		return true;

	return false;
}

static void tcf_block_destroy(struct tcf_block *block)
{
	mutex_destroy(&block->lock);
412
	mutex_destroy(&block->proto_destroy_lock);
413 414 415 416 417 418 419
	kfree_rcu(block, rcu);
}

static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
{
	struct tcf_block *block = chain->block;

420
	mutex_destroy(&chain->filter_chain_lock);
421
	kfree_rcu(chain, rcu);
422 423
	if (free_block)
		tcf_block_destroy(block);
424
}
425

426 427
static void tcf_chain_hold(struct tcf_chain *chain)
{
428 429
	ASSERT_BLOCK_LOCKED(chain->block);

430
	++chain->refcnt;
431 432
}

433
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
434
{
435 436
	ASSERT_BLOCK_LOCKED(chain->block);

437
	/* In case all the references are action references, this
438
	 * chain should not be shown to the user.
439 440 441 442
	 */
	return chain->refcnt == chain->action_refcnt;
}

443 444
static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
					  u32 chain_index)
445 446 447
{
	struct tcf_chain *chain;

448 449
	ASSERT_BLOCK_LOCKED(block);

450
	list_for_each_entry(chain, &block->chain_list, list) {
451
		if (chain->index == chain_index)
452
			return chain;
453 454 455 456
	}
	return NULL;
}

457 458 459 460 461 462 463 464 465 466 467 468 469 470
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
					      u32 chain_index)
{
	struct tcf_chain *chain;

	list_for_each_entry_rcu(chain, &block->chain_list, list) {
		if (chain->index == chain_index)
			return chain;
	}
	return NULL;
}
#endif

471 472 473
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast);

474 475 476
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
					 u32 chain_index, bool create,
					 bool by_act)
477
{
478 479
	struct tcf_chain *chain = NULL;
	bool is_first_reference;
480

481 482
	mutex_lock(&block->lock);
	chain = tcf_chain_lookup(block, chain_index);
483 484
	if (chain) {
		tcf_chain_hold(chain);
485 486
	} else {
		if (!create)
487
			goto errout;
488 489
		chain = tcf_chain_create(block, chain_index);
		if (!chain)
490
			goto errout;
491
	}
492

493 494
	if (by_act)
		++chain->action_refcnt;
495 496
	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
	mutex_unlock(&block->lock);
497 498 499 500 501 502

	/* Send notification only in case we got the first
	 * non-action reference. Until then, the chain acts only as
	 * a placeholder for actions pointing to it and user ought
	 * not know about them.
	 */
503
	if (is_first_reference && !by_act)
504 505 506
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);

507
	return chain;
508 509 510 511

errout:
	mutex_unlock(&block->lock);
	return chain;
512
}
513

514 515
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
				       bool create)
516 517 518
{
	return __tcf_chain_get(block, chain_index, create, false);
}
519

520 521
struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
{
522
	return __tcf_chain_get(block, chain_index, true, true);
523 524 525
}
EXPORT_SYMBOL(tcf_chain_get_by_act);

526 527 528 529 530 531
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv);
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast);
532

533 534
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
			    bool explicitly_created)
535
{
536
	struct tcf_block *block = chain->block;
537
	const struct tcf_proto_ops *tmplt_ops;
538
	bool free_block = false;
539
	unsigned int refcnt;
540
	void *tmplt_priv;
541 542

	mutex_lock(&block->lock);
543 544 545 546 547 548 549 550
	if (explicitly_created) {
		if (!chain->explicitly_created) {
			mutex_unlock(&block->lock);
			return;
		}
		chain->explicitly_created = false;
	}

551 552
	if (by_act)
		chain->action_refcnt--;
553 554 555 556 557 558

	/* tc_chain_notify_delete can't be called while holding block lock.
	 * However, when block is unlocked chain can be changed concurrently, so
	 * save these to temporary variables.
	 */
	refcnt = --chain->refcnt;
559 560
	tmplt_ops = chain->tmplt_ops;
	tmplt_priv = chain->tmplt_priv;
561 562

	/* The last dropped non-action reference will trigger notification. */
563 564
	if (refcnt - chain->action_refcnt == 0 && !by_act) {
		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
565
				       block, NULL, 0, 0, false);
566 567 568
		/* Last reference to chain, no need to lock. */
		chain->flushing = false;
	}
569

570 571 572 573
	if (refcnt == 0)
		free_block = tcf_chain_detach(chain);
	mutex_unlock(&block->lock);

574
	if (refcnt == 0) {
575
		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
576
		tcf_chain_destroy(chain, free_block);
577
	}
578
}
579

580
static void tcf_chain_put(struct tcf_chain *chain)
581
{
582
	__tcf_chain_put(chain, false, false);
583
}
584

585 586
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
587
	__tcf_chain_put(chain, true, false);
588 589 590
}
EXPORT_SYMBOL(tcf_chain_put_by_act);

591 592
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
593
	__tcf_chain_put(chain, false, true);
594 595
}

596
static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
597
{
598
	struct tcf_proto *tp, *tp_next;
599

600 601
	mutex_lock(&chain->filter_chain_lock);
	tp = tcf_chain_dereference(chain->filter_chain, chain);
602 603 604 605 606 607
	while (tp) {
		tp_next = rcu_dereference_protected(tp->next, 1);
		tcf_proto_signal_destroying(chain, tp);
		tp = tp_next;
	}
	tp = tcf_chain_dereference(chain->filter_chain, chain);
608
	RCU_INIT_POINTER(chain->filter_chain, NULL);
609
	tcf_chain0_head_change(chain, NULL);
610
	chain->flushing = true;
611 612
	mutex_unlock(&chain->filter_chain_lock);

613
	while (tp) {
614
		tp_next = rcu_dereference_protected(tp->next, 1);
615
		tcf_proto_put(tp, rtnl_held, NULL);
616
		tp = tp_next;
617 618 619
	}
}

620 621 622
static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo);

623
static void tcf_block_offload_init(struct flow_block_offload *bo,
624
				   struct net_device *dev, struct Qdisc *sch,
625 626 627 628 629 630 631 632 633 634 635
				   enum flow_block_command command,
				   enum flow_block_binder_type binder_type,
				   struct flow_block *flow_block,
				   bool shared, struct netlink_ext_ack *extack)
{
	bo->net = dev_net(dev);
	bo->command = command;
	bo->binder_type = binder_type;
	bo->block = flow_block;
	bo->block_shared = shared;
	bo->extack = extack;
636
	bo->sch = sch;
637 638 639
	INIT_LIST_HEAD(&bo->cb_list);
}

640 641 642 643
static void tcf_block_unbind(struct tcf_block *block,
			     struct flow_block_offload *bo);

static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
644
{
645 646
	struct tcf_block *block = block_cb->indr.data;
	struct net_device *dev = block_cb->indr.dev;
647
	struct Qdisc *sch = block_cb->indr.sch;
648
	struct netlink_ext_ack extack = {};
649
	struct flow_block_offload bo = {};
650

651
	tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
652 653 654
			       block_cb->indr.binder_type,
			       &block->flow_block, tcf_block_shared(block),
			       &extack);
655
	rtnl_lock();
656
	down_write(&block->cb_lock);
657
	list_del(&block_cb->driver_list);
658 659
	list_move(&block_cb->list, &bo.cb_list);
	tcf_block_unbind(block, &bo);
660
	up_write(&block->cb_lock);
661
	rtnl_unlock();
662 663
}

664 665
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
666
	return atomic_read(&block->offloadcnt);
667 668 669
}

static int tcf_block_offload_cmd(struct tcf_block *block,
670
				 struct net_device *dev, struct Qdisc *sch,
671
				 struct tcf_block_ext_info *ei,
672
				 enum flow_block_command command,
673
				 struct netlink_ext_ack *extack)
674
{
675
	struct flow_block_offload bo = {};
676

677
	tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
678 679
			       &block->flow_block, tcf_block_shared(block),
			       extack);
680

681 682 683
	if (dev->netdev_ops->ndo_setup_tc) {
		int err;

684
		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
685 686 687 688 689 690 691
		if (err < 0) {
			if (err != -EOPNOTSUPP)
				NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
			return err;
		}

		return tcf_block_setup(block, &bo);
692
	}
693

694
	flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
695 696 697 698
				    tc_block_indr_cleanup);
	tcf_block_setup(block, &bo);

	return -EOPNOTSUPP;
699 700
}

701
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
702 703
				  struct tcf_block_ext_info *ei,
				  struct netlink_ext_ack *extack)
704
{
705 706 707
	struct net_device *dev = q->dev_queue->dev;
	int err;

708
	down_write(&block->cb_lock);
709 710 711 712

	/* If tc offload feature is disabled and the block we try to bind
	 * to already has some offloaded filters, forbid to bind.
	 */
713 714 715
	if (dev->netdev_ops->ndo_setup_tc &&
	    !tc_can_offload(dev) &&
	    tcf_block_offload_in_use(block)) {
716
		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
717 718
		err = -EOPNOTSUPP;
		goto err_unlock;
719
	}
720

721
	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
722 723
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_inc;
724
	if (err)
725
		goto err_unlock;
726

727
	up_write(&block->cb_lock);
728
	return 0;
729 730

no_offload_dev_inc:
731
	if (tcf_block_offload_in_use(block))
732
		goto err_unlock;
733

734
	err = 0;
735
	block->nooffloaddevcnt++;
736 737 738
err_unlock:
	up_write(&block->cb_lock);
	return err;
739 740 741 742 743
}

static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
				     struct tcf_block_ext_info *ei)
{
744 745 746
	struct net_device *dev = q->dev_queue->dev;
	int err;

747
	down_write(&block->cb_lock);
748
	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
749 750
	if (err == -EOPNOTSUPP)
		goto no_offload_dev_dec;
751
	up_write(&block->cb_lock);
752 753 754 755
	return;

no_offload_dev_dec:
	WARN_ON(block->nooffloaddevcnt-- == 0);
756
	up_write(&block->cb_lock);
757 758
}

759
static int
760 761 762
tcf_chain0_head_change_cb_add(struct tcf_block *block,
			      struct tcf_block_ext_info *ei,
			      struct netlink_ext_ack *extack)
763 764
{
	struct tcf_filter_chain_list_item *item;
765
	struct tcf_chain *chain0;
766 767 768 769 770 771 772 773

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item) {
		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
		return -ENOMEM;
	}
	item->chain_head_change = ei->chain_head_change;
	item->chain_head_change_priv = ei->chain_head_change_priv;
774 775 776

	mutex_lock(&block->lock);
	chain0 = block->chain0.chain;
777 778 779 780
	if (chain0)
		tcf_chain_hold(chain0);
	else
		list_add(&item->list, &block->chain0.filter_chain_list);
781 782
	mutex_unlock(&block->lock);

783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
	if (chain0) {
		struct tcf_proto *tp_head;

		mutex_lock(&chain0->filter_chain_lock);

		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
		if (tp_head)
			tcf_chain_head_change_item(item, tp_head);

		mutex_lock(&block->lock);
		list_add(&item->list, &block->chain0.filter_chain_list);
		mutex_unlock(&block->lock);

		mutex_unlock(&chain0->filter_chain_lock);
		tcf_chain_put(chain0);
	}

800 801 802 803
	return 0;
}

static void
804 805
tcf_chain0_head_change_cb_del(struct tcf_block *block,
			      struct tcf_block_ext_info *ei)
806 807 808
{
	struct tcf_filter_chain_list_item *item;

809
	mutex_lock(&block->lock);
810
	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
811 812 813
		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
		    (item->chain_head_change == ei->chain_head_change &&
		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
814
			if (block->chain0.chain)
815
				tcf_chain_head_change_item(item, NULL);
816
			list_del(&item->list);
817 818
			mutex_unlock(&block->lock);

819 820 821 822
			kfree(item);
			return;
		}
	}
823
	mutex_unlock(&block->lock);
824 825 826
	WARN_ON(1);
}

827
struct tcf_net {
828
	spinlock_t idr_lock; /* Protects idr */
829 830 831 832 833 834
	struct idr idr;
};

static unsigned int tcf_net_id;

static int tcf_block_insert(struct tcf_block *block, struct net *net,
835
			    struct netlink_ext_ack *extack)
836
{
837
	struct tcf_net *tn = net_generic(net, tcf_net_id);
838 839 840 841 842 843 844 845
	int err;

	idr_preload(GFP_KERNEL);
	spin_lock(&tn->idr_lock);
	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
			    GFP_NOWAIT);
	spin_unlock(&tn->idr_lock);
	idr_preload_end();
846

847
	return err;
848 849
}

850 851 852 853
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

854
	spin_lock(&tn->idr_lock);
855
	idr_remove(&tn->idr, block->index);
856
	spin_unlock(&tn->idr_lock);
857 858 859
}

static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
860
					  u32 block_index,
861
					  struct netlink_ext_ack *extack)
862
{
863
	struct tcf_block *block;
864

865
	block = kzalloc(sizeof(*block), GFP_KERNEL);
866 867
	if (!block) {
		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
868
		return ERR_PTR(-ENOMEM);
869
	}
870
	mutex_init(&block->lock);
871
	mutex_init(&block->proto_destroy_lock);
872
	init_rwsem(&block->cb_lock);
873
	flow_block_init(&block->flow_block);
874
	INIT_LIST_HEAD(&block->chain_list);
875
	INIT_LIST_HEAD(&block->owner_list);
876
	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
877

878
	refcount_set(&block->refcnt, 1);
879
	block->net = net;
880 881 882 883 884
	block->index = block_index;

	/* Don't store q pointer for blocks which are shared */
	if (!tcf_block_shared(block))
		block->q = q;
885 886 887 888 889 890 891
	return block;
}

static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

892
	return idr_find(&tn->idr, block_index);
893 894
}

895 896 897 898 899 900 901 902 903 904 905 906 907
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
	struct tcf_block *block;

	rcu_read_lock();
	block = tcf_block_lookup(net, block_index);
	if (block && !refcount_inc_not_zero(&block->refcnt))
		block = NULL;
	rcu_read_unlock();

	return block;
}

908 909
static struct tcf_chain *
__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
910
{
911 912 913 914 915 916 917
	mutex_lock(&block->lock);
	if (chain)
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);
	else
		chain = list_first_entry_or_null(&block->chain_list,
						 struct tcf_chain, list);
918

919 920 921 922 923 924
	/* skip all action-only chains */
	while (chain && tcf_chain_held_by_acts_only(chain))
		chain = list_is_last(&chain->list, &block->chain_list) ?
			NULL : list_next_entry(chain, list);

	if (chain)
925
		tcf_chain_hold(chain);
926
	mutex_unlock(&block->lock);
927

928
	return chain;
929 930
}

931 932 933 934 935 936 937 938 939 940 941
/* Function to be used by all clients that want to iterate over all chains on
 * block. It properly obtains block->lock and takes reference to chain before
 * returning it. Users of this function must be tolerant to concurrent chain
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_chain *
tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
942
{
943
	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
944

945
	if (chain)
946
		tcf_chain_put(chain);
947 948 949 950 951

	return chain_next;
}
EXPORT_SYMBOL(tcf_get_next_chain);

952 953 954
static struct tcf_proto *
__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
955 956
	u32 prio = 0;

957 958 959
	ASSERT_RTNL();
	mutex_lock(&chain->filter_chain_lock);

960
	if (!tp) {
961
		tp = tcf_chain_dereference(chain->filter_chain, chain);
962 963 964 965 966 967 968 969 970 971 972 973
	} else if (tcf_proto_is_deleting(tp)) {
		/* 'deleting' flag is set and chain->filter_chain_lock was
		 * unlocked, which means next pointer could be invalid. Restart
		 * search.
		 */
		prio = tp->prio + 1;
		tp = tcf_chain_dereference(chain->filter_chain, chain);

		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
			if (!tp->deleting && tp->prio >= prio)
				break;
	} else {
974
		tp = tcf_chain_dereference(tp->next, chain);
975
	}
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993

	if (tp)
		tcf_proto_get(tp);

	mutex_unlock(&chain->filter_chain_lock);

	return tp;
}

/* Function to be used by all clients that want to iterate over all tp's on
 * chain. Users of this function must be tolerant to concurrent tp
 * insertion/deletion or ensure that no concurrent chain modification is
 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 * consistent dump because rtnl lock is released each time skb is filled with
 * data and sent to user-space.
 */

struct tcf_proto *
994
tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
995 996 997 998
{
	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);

	if (tp)
999
		tcf_proto_put(tp, true, NULL);
1000 1001 1002 1003 1004

	return tp_next;
}
EXPORT_SYMBOL(tcf_get_next_proto);

1005
static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
{
	struct tcf_chain *chain;

	/* Last reference to block. At this point chains cannot be added or
	 * removed concurrently.
	 */
	for (chain = tcf_get_next_chain(block, NULL);
	     chain;
	     chain = tcf_get_next_chain(block, chain)) {
		tcf_chain_put_explicitly_created(chain);
1016
		tcf_chain_flush(chain, rtnl_held);
1017 1018 1019
	}
}

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
/* Lookup Qdisc and increments its reference counter.
 * Set parent, if necessary.
 */

static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
			    u32 *parent, int ifindex, bool rtnl_held,
			    struct netlink_ext_ack *extack)
{
	const struct Qdisc_class_ops *cops;
	struct net_device *dev;
	int err = 0;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	rcu_read_lock();

	/* Find link */
	dev = dev_get_by_index_rcu(net, ifindex);
	if (!dev) {
		rcu_read_unlock();
		return -ENODEV;
	}

	/* Find qdisc */
	if (!*parent) {
		*q = dev->qdisc;
		*parent = (*q)->handle;
	} else {
		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
		if (!*q) {
			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
			err = -EINVAL;
			goto errout_rcu;
		}
	}

	*q = qdisc_refcount_inc_nz(*q);
	if (!*q) {
		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
		err = -EINVAL;
		goto errout_rcu;
	}

	/* Is it classful? */
	cops = (*q)->ops->cl_ops;
	if (!cops) {
		NL_SET_ERR_MSG(extack, "Qdisc not classful");
		err = -EINVAL;
		goto errout_qdisc;
	}

	if (!cops->tcf_block) {
		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
		err = -EOPNOTSUPP;
		goto errout_qdisc;
	}

errout_rcu:
	/* At this point we know that qdisc is not noop_qdisc,
	 * which means that qdisc holds a reference to net_device
	 * and we hold a reference to qdisc, so it is safe to release
	 * rcu read lock.
	 */
	rcu_read_unlock();
	return err;

errout_qdisc:
	rcu_read_unlock();

	if (rtnl_held)
		qdisc_put(*q);
	else
		qdisc_put_unlocked(*q);
	*q = NULL;

	return err;
}

static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
			       int ifindex, struct netlink_ext_ack *extack)
{
	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
		return 0;

	/* Do we search for filter, attached to class? */
	if (TC_H_MIN(parent)) {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		*cl = cops->find(q, parent);
		if (*cl == 0) {
			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
			return -ENOENT;
		}
	}

	return 0;
}

static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
					  unsigned long cl, int ifindex,
					  u32 block_index,
					  struct netlink_ext_ack *extack)
{
	struct tcf_block *block;

	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
		block = tcf_block_refcnt_get(net, block_index);
		if (!block) {
			NL_SET_ERR_MSG(extack, "Block of given index was not found");
			return ERR_PTR(-EINVAL);
		}
	} else {
		const struct Qdisc_class_ops *cops = q->ops->cl_ops;

		block = cops->tcf_block(q, cl, extack);
		if (!block)
			return ERR_PTR(-EINVAL);

		if (tcf_block_shared(block)) {
			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
			return ERR_PTR(-EOPNOTSUPP);
		}

		/* Always take reference to block in order to support execution
		 * of rules update path of cls API without rtnl lock. Caller
		 * must release block when it is finished using it. 'if' block
		 * of this conditional obtain reference to block by calling
		 * tcf_block_refcnt_get().
		 */
		refcount_inc(&block->refcnt);
	}

	return block;
}

1156
static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1157
			    struct tcf_block_ext_info *ei, bool rtnl_held)
1158
{
1159
	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1160 1161 1162 1163 1164 1165 1166 1167
		/* Flushing/putting all chains will cause the block to be
		 * deallocated when last chain is freed. However, if chain_list
		 * is empty, block has to be manually deallocated. After block
		 * reference counter reached 0, it is no longer possible to
		 * increment it or add new chains to block.
		 */
		bool free_block = list_empty(&block->chain_list);

1168
		mutex_unlock(&block->lock);
1169 1170 1171 1172 1173 1174 1175
		if (tcf_block_shared(block))
			tcf_block_remove(block, block->net);

		if (q)
			tcf_block_offload_unbind(block, q, ei);

		if (free_block)
1176
			tcf_block_destroy(block);
1177
		else
1178
			tcf_block_flush_all_chains(block, rtnl_held);
1179 1180 1181 1182 1183
	} else if (q) {
		tcf_block_offload_unbind(block, q, ei);
	}
}

1184
static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1185
{
1186
	__tcf_block_put(block, NULL, NULL, rtnl_held);
1187 1188
}

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
/* Find tcf block.
 * Set q, parent, cl when appropriate.
 */

static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
					u32 *parent, unsigned long *cl,
					int ifindex, u32 block_index,
					struct netlink_ext_ack *extack)
{
	struct tcf_block *block;
1199
	int err = 0;
1200

1201
	ASSERT_RTNL();
1202

1203 1204 1205
	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
	if (err)
		goto errout;
1206

1207 1208 1209
	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
	if (err)
		goto errout_qdisc;
1210

1211
	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1212 1213
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
1214
		goto errout_qdisc;
1215
	}
1216 1217

	return block;
1218 1219

errout_qdisc:
1220
	if (*q)
1221
		qdisc_put(*q);
1222 1223
errout:
	*q = NULL;
1224 1225 1226
	return ERR_PTR(err);
}

1227 1228
static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
			      bool rtnl_held)
1229
{
1230
	if (!IS_ERR_OR_NULL(block))
1231
		tcf_block_refcnt_put(block, rtnl_held);
1232

1233 1234 1235 1236 1237 1238
	if (q) {
		if (rtnl_held)
			qdisc_put(q);
		else
			qdisc_put_unlocked(q);
	}
1239 1240
}

1241 1242 1243
struct tcf_block_owner_item {
	struct list_head list;
	struct Qdisc *q;
1244
	enum flow_block_binder_type binder_type;
1245 1246 1247 1248 1249
};

static void
tcf_block_owner_netif_keep_dst(struct tcf_block *block,
			       struct Qdisc *q,
1250
			       enum flow_block_binder_type binder_type)
1251 1252
{
	if (block->keep_dst &&
1253 1254
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
		netif_keep_dst(qdisc_dev(q));
}

void tcf_block_netif_keep_dst(struct tcf_block *block)
{
	struct tcf_block_owner_item *item;

	block->keep_dst = true;
	list_for_each_entry(item, &block->owner_list, list)
		tcf_block_owner_netif_keep_dst(block, item->q,
					       item->binder_type);
}
EXPORT_SYMBOL(tcf_block_netif_keep_dst);

static int tcf_block_owner_add(struct tcf_block *block,
			       struct Qdisc *q,
1271
			       enum flow_block_binder_type binder_type)
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
{
	struct tcf_block_owner_item *item;

	item = kmalloc(sizeof(*item), GFP_KERNEL);
	if (!item)
		return -ENOMEM;
	item->q = q;
	item->binder_type = binder_type;
	list_add(&item->list, &block->owner_list);
	return 0;
}

static void tcf_block_owner_del(struct tcf_block *block,
				struct Qdisc *q,
1286
				enum flow_block_binder_type binder_type)
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
{
	struct tcf_block_owner_item *item;

	list_for_each_entry(item, &block->owner_list, list) {
		if (item->q == q && item->binder_type == binder_type) {
			list_del(&item->list);
			kfree(item);
			return;
		}
	}
	WARN_ON(1);
}

1300 1301 1302 1303 1304 1305 1306 1307
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
		      struct tcf_block_ext_info *ei,
		      struct netlink_ext_ack *extack)
{
	struct net *net = qdisc_net(q);
	struct tcf_block *block = NULL;
	int err;

1308
	if (ei->block_index)
1309
		/* block_index not 0 means the shared block is requested */
1310
		block = tcf_block_refcnt_get(net, ei->block_index);
1311 1312

	if (!block) {
1313
		block = tcf_block_create(net, q, ei->block_index, extack);
1314 1315
		if (IS_ERR(block))
			return PTR_ERR(block);
1316 1317
		if (tcf_block_shared(block)) {
			err = tcf_block_insert(block, net, extack);
1318 1319 1320 1321 1322
			if (err)
				goto err_block_insert;
		}
	}

1323 1324 1325 1326 1327 1328
	err = tcf_block_owner_add(block, q, ei->binder_type);
	if (err)
		goto err_block_owner_add;

	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);

1329
	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1330
	if (err)
1331
		goto err_chain0_head_change_cb_add;
1332

1333
	err = tcf_block_offload_bind(block, q, ei, extack);
1334 1335 1336
	if (err)
		goto err_block_offload_bind;

1337 1338
	*p_block = block;
	return 0;
1339

1340
err_block_offload_bind:
1341 1342
	tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
1343 1344
	tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
1345
err_block_insert:
1346
	tcf_block_refcnt_put(block, true);
1347
	return err;
1348
}
1349 1350
EXPORT_SYMBOL(tcf_block_get_ext);

1351 1352 1353 1354 1355 1356 1357
static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
{
	struct tcf_proto __rcu **p_filter_chain = priv;

	rcu_assign_pointer(*p_filter_chain, tp_head);
}

1358
int tcf_block_get(struct tcf_block **p_block,
1359 1360
		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
		  struct netlink_ext_ack *extack)
1361
{
1362 1363 1364 1365
	struct tcf_block_ext_info ei = {
		.chain_head_change = tcf_chain_head_change_dflt,
		.chain_head_change_priv = p_filter_chain,
	};
1366

1367
	WARN_ON(!p_filter_chain);
1368
	return tcf_block_get_ext(p_block, q, &ei, extack);
1369
}
1370 1371
EXPORT_SYMBOL(tcf_block_get);

1372
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1373
 * actions should be all removed after flushing.
1374
 */
1375
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1376
		       struct tcf_block_ext_info *ei)
1377
{
1378 1379
	if (!block)
		return;
1380
	tcf_chain0_head_change_cb_del(block, ei);
1381
	tcf_block_owner_del(block, q, ei->binder_type);
1382

1383
	__tcf_block_put(block, q, ei, true);
1384
}
1385 1386 1387 1388 1389 1390
EXPORT_SYMBOL(tcf_block_put_ext);

void tcf_block_put(struct tcf_block *block)
{
	struct tcf_block_ext_info ei = {0, };

1391 1392
	if (!block)
		return;
1393
	tcf_block_put_ext(block, block->q, &ei);
1394
}
1395

1396
EXPORT_SYMBOL(tcf_block_put);
1397

1398
static int
1399
tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1400 1401 1402
			    void *cb_priv, bool add, bool offload_in_use,
			    struct netlink_ext_ack *extack)
{
1403
	struct tcf_chain *chain, *chain_prev;
1404
	struct tcf_proto *tp, *tp_prev;
1405 1406
	int err;

1407 1408
	lockdep_assert_held(&block->cb_lock);

1409 1410 1411 1412 1413
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
1414 1415 1416
		for (tp = __tcf_get_next_proto(chain, NULL); tp;
		     tp_prev = tp,
			     tp = __tcf_get_next_proto(chain, tp),
1417
			     tcf_proto_put(tp_prev, true, NULL)) {
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433
			if (tp->ops->reoffload) {
				err = tp->ops->reoffload(tp, add, cb, cb_priv,
							 extack);
				if (err && add)
					goto err_playback_remove;
			} else if (add && offload_in_use) {
				err = -EOPNOTSUPP;
				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
				goto err_playback_remove;
			}
		}
	}

	return 0;

err_playback_remove:
1434
	tcf_proto_put(tp, true, NULL);
1435
	tcf_chain_put(chain);
1436 1437 1438 1439 1440
	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
				    extack);
	return err;
}

1441 1442 1443 1444 1445 1446
static int tcf_block_bind(struct tcf_block *block,
			  struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;
	int err, i = 0;

1447 1448
	lockdep_assert_held(&block->cb_lock);

1449 1450 1451 1452 1453 1454 1455
	list_for_each_entry(block_cb, &bo->cb_list, list) {
		err = tcf_block_playback_offloads(block, block_cb->cb,
						  block_cb->cb_priv, true,
						  tcf_block_offload_in_use(block),
						  bo->extack);
		if (err)
			goto err_unroll;
1456 1457
		if (!bo->unlocked_driver_cb)
			block->lockeddevcnt++;
1458 1459 1460

		i++;
	}
1461
	list_splice(&bo->cb_list, &block->flow_block.cb_list);
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472

	return 0;

err_unroll:
	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		if (i-- > 0) {
			list_del(&block_cb->list);
			tcf_block_playback_offloads(block, block_cb->cb,
						    block_cb->cb_priv, false,
						    tcf_block_offload_in_use(block),
						    NULL);
1473 1474
			if (!bo->unlocked_driver_cb)
				block->lockeddevcnt--;
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
		}
		flow_block_cb_free(block_cb);
	}

	return err;
}

static void tcf_block_unbind(struct tcf_block *block,
			     struct flow_block_offload *bo)
{
	struct flow_block_cb *block_cb, *next;

1487 1488
	lockdep_assert_held(&block->cb_lock);

1489 1490 1491 1492 1493 1494 1495
	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
		tcf_block_playback_offloads(block, block_cb->cb,
					    block_cb->cb_priv, false,
					    tcf_block_offload_in_use(block),
					    NULL);
		list_del(&block_cb->list);
		flow_block_cb_free(block_cb);
1496 1497
		if (!bo->unlocked_driver_cb)
			block->lockeddevcnt--;
1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
	}
}

static int tcf_block_setup(struct tcf_block *block,
			   struct flow_block_offload *bo)
{
	int err;

	switch (bo->command) {
	case FLOW_BLOCK_BIND:
		err = tcf_block_bind(block, bo);
		break;
	case FLOW_BLOCK_UNBIND:
		err = 0;
		tcf_block_unbind(block, bo);
		break;
	default:
		WARN_ON_ONCE(1);
		err = -EOPNOTSUPP;
	}

	return err;
}

1522 1523 1524 1525
/* Main classifier routine: scans classifier chain attached
 * to this qdisc, (optionally) tests for protocol and asks
 * specific classifiers.
 */
1526 1527
static inline int __tcf_classify(struct sk_buff *skb,
				 const struct tcf_proto *tp,
1528
				 const struct tcf_proto *orig_tp,
1529 1530 1531
				 struct tcf_result *res,
				 bool compat_mode,
				 u32 *last_executed_chain)
1532 1533
{
#ifdef CONFIG_NET_CLS_ACT
1534
	const int max_reclassify_loop = 16;
1535
	const struct tcf_proto *first_tp;
1536 1537 1538 1539 1540
	int limit = 0;

reclassify:
#endif
	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1541
		__be16 protocol = skb_protocol(skb, false);
1542 1543 1544 1545 1546 1547 1548 1549
		int err;

		if (tp->protocol != protocol &&
		    tp->protocol != htons(ETH_P_ALL))
			continue;

		err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
1550
		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1551
			first_tp = orig_tp;
1552
			*last_executed_chain = first_tp->chain->index;
1553
			goto reset;
1554
		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1555
			first_tp = res->goto_tp;
1556
			*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
1557 1558
			goto reset;
		}
1559 1560 1561 1562 1563 1564 1565 1566 1567
#endif
		if (err >= 0)
			return err;
	}

	return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
	if (unlikely(limit++ >= max_reclassify_loop)) {
1568 1569 1570
		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
				       tp->chain->block->index,
				       tp->prio & 0xffff,
1571 1572 1573 1574
				       ntohs(tp->protocol));
		return TC_ACT_SHOT;
	}

1575
	tp = first_tp;
1576 1577 1578
	goto reclassify;
#endif
}
1579 1580 1581 1582 1583 1584

int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
		 struct tcf_result *res, bool compat_mode)
{
	u32 last_executed_chain = 0;

1585
	return __tcf_classify(skb, tp, tp, res, compat_mode,
1586 1587
			      &last_executed_chain);
}
1588 1589
EXPORT_SYMBOL(tcf_classify);

1590 1591 1592
int tcf_classify_ingress(struct sk_buff *skb,
			 const struct tcf_block *ingress_block,
			 const struct tcf_proto *tp,
1593 1594 1595 1596 1597
			 struct tcf_result *res, bool compat_mode)
{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
	u32 last_executed_chain = 0;

1598
	return __tcf_classify(skb, tp, tp, res, compat_mode,
1599 1600 1601
			      &last_executed_chain);
#else
	u32 last_executed_chain = tp ? tp->chain->index : 0;
1602
	const struct tcf_proto *orig_tp = tp;
1603 1604 1605
	struct tc_skb_ext *ext;
	int ret;

1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618
	ext = skb_ext_find(skb, TC_SKB_EXT);

	if (ext && ext->chain) {
		struct tcf_chain *fchain;

		fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
		if (!fchain)
			return TC_ACT_SHOT;

		/* Consume, so cloned/redirect skbs won't inherit ext */
		skb_ext_del(skb, TC_SKB_EXT);

		tp = rcu_dereference_bh(fchain->filter_chain);
1619
		last_executed_chain = fchain->index;
1620 1621 1622 1623
	}

	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
			     &last_executed_chain);
1624 1625 1626

	/* If we missed on some chain */
	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1627
		ext = tc_skb_ext_alloc(skb);
1628 1629 1630
		if (WARN_ON_ONCE(!ext))
			return TC_ACT_SHOT;
		ext->chain = last_executed_chain;
1631
		ext->mru = qdisc_skb_cb(skb)->mru;
1632
		ext->post_ct = qdisc_skb_cb(skb)->post_ct;
1633 1634 1635 1636 1637 1638 1639
	}

	return ret;
#endif
}
EXPORT_SYMBOL(tcf_classify_ingress);

1640 1641 1642 1643 1644
struct tcf_chain_info {
	struct tcf_proto __rcu **pprev;
	struct tcf_proto __rcu *next;
};

1645 1646
static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info)
1647
{
1648
	return tcf_chain_dereference(*chain_info->pprev, chain);
1649 1650
}

1651 1652 1653
static int tcf_chain_tp_insert(struct tcf_chain *chain,
			       struct tcf_chain_info *chain_info,
			       struct tcf_proto *tp)
1654
{
1655 1656 1657
	if (chain->flushing)
		return -EAGAIN;

1658
	if (*chain_info->pprev == chain->filter_chain)
1659
		tcf_chain0_head_change(chain, tp);
1660
	tcf_proto_get(tp);
1661
	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1662
	rcu_assign_pointer(*chain_info->pprev, tp);
1663 1664

	return 0;
1665 1666 1667 1668 1669 1670
}

static void tcf_chain_tp_remove(struct tcf_chain *chain,
				struct tcf_chain_info *chain_info,
				struct tcf_proto *tp)
{
1671
	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1672

1673
	tcf_proto_mark_delete(tp);
1674
	if (tp == chain->filter_chain)
1675
		tcf_chain0_head_change(chain, next);
1676 1677 1678
	RCU_INIT_POINTER(*chain_info->pprev, next);
}

1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate);

/* Try to insert new proto.
 * If proto with specified priority already exists, free new proto
 * and return existing one.
 */

static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
						    struct tcf_proto *tp_new,
1691 1692
						    u32 protocol, u32 prio,
						    bool rtnl_held)
1693 1694 1695
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp;
1696
	int err = 0;
1697 1698 1699

	mutex_lock(&chain->filter_chain_lock);

1700 1701 1702 1703 1704 1705
	if (tcf_proto_exists_destroying(chain, tp_new)) {
		mutex_unlock(&chain->filter_chain_lock);
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
		return ERR_PTR(-EAGAIN);
	}

1706 1707 1708
	tp = tcf_chain_tp_find(chain, &chain_info,
			       protocol, prio, false);
	if (!tp)
1709
		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1710 1711 1712
	mutex_unlock(&chain->filter_chain_lock);

	if (tp) {
1713
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1714
		tp_new = tp;
1715
	} else if (err) {
1716
		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1717
		tp_new = ERR_PTR(err);
1718 1719 1720 1721 1722 1723
	}

	return tp_new;
}

static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1724
				      struct tcf_proto *tp, bool rtnl_held,
1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748
				      struct netlink_ext_ack *extack)
{
	struct tcf_chain_info chain_info;
	struct tcf_proto *tp_iter;
	struct tcf_proto **pprev;
	struct tcf_proto *next;

	mutex_lock(&chain->filter_chain_lock);

	/* Atomically find and remove tp from chain. */
	for (pprev = &chain->filter_chain;
	     (tp_iter = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp_iter->next) {
		if (tp_iter == tp) {
			chain_info.pprev = pprev;
			chain_info.next = tp_iter->next;
			WARN_ON(tp_iter->deleting);
			break;
		}
	}
	/* Verify that tp still exists and no new filters were inserted
	 * concurrently.
	 * Mark tp for deletion if it is empty.
	 */
1749
	if (!tp_iter || !tcf_proto_check_delete(tp)) {
1750 1751 1752 1753
		mutex_unlock(&chain->filter_chain_lock);
		return;
	}

1754
	tcf_proto_signal_destroying(chain, tp);
1755 1756 1757 1758 1759 1760
	next = tcf_chain_dereference(chain_info.next, chain);
	if (tp == chain->filter_chain)
		tcf_chain0_head_change(chain, next);
	RCU_INIT_POINTER(*chain_info.pprev, next);
	mutex_unlock(&chain->filter_chain_lock);

1761
	tcf_proto_put(tp, rtnl_held, extack);
1762 1763
}

1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
					   struct tcf_chain_info *chain_info,
					   u32 protocol, u32 prio,
					   bool prio_allocate)
{
	struct tcf_proto **pprev;
	struct tcf_proto *tp;

	/* Check the chain for existence of proto-tcf with this priority */
	for (pprev = &chain->filter_chain;
1774 1775
	     (tp = tcf_chain_dereference(*pprev, chain));
	     pprev = &tp->next) {
1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787
		if (tp->prio >= prio) {
			if (tp->prio == prio) {
				if (prio_allocate ||
				    (tp->protocol != protocol && protocol))
					return ERR_PTR(-EINVAL);
			} else {
				tp = NULL;
			}
			break;
		}
	}
	chain_info->pprev = pprev;
1788 1789 1790 1791 1792 1793
	if (tp) {
		chain_info->next = tp->next;
		tcf_proto_get(tp);
	} else {
		chain_info->next = NULL;
	}
1794 1795 1796
	return tp;
}

1797
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1798 1799
			 struct tcf_proto *tp, struct tcf_block *block,
			 struct Qdisc *q, u32 parent, void *fh,
1800
			 u32 portid, u32 seq, u16 flags, int event,
1801
			 bool terse_dump, bool rtnl_held)
1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
{
	struct tcmsg *tcm;
	struct nlmsghdr  *nlh;
	unsigned char *b = skb_tail_pointer(skb);

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
1814 1815 1816 1817 1818 1819 1820
	if (q) {
		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
		tcm->tcm_parent = parent;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}
1821 1822 1823 1824 1825 1826 1827
	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
		goto nla_put_failure;
	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
		goto nla_put_failure;
	if (!fh) {
		tcm->tcm_handle = 0;
1828 1829 1830 1831 1832 1833 1834 1835
	} else if (terse_dump) {
		if (tp->ops->terse_dump) {
			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
						rtnl_held) < 0)
				goto nla_put_failure;
		} else {
			goto cls_op_not_supp;
		}
1836
	} else {
1837 1838
		if (tp->ops->dump &&
		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1839 1840 1841 1842 1843 1844 1845
			goto nla_put_failure;
	}
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
1846
cls_op_not_supp:
1847 1848 1849 1850 1851 1852
	nlmsg_trim(skb, b);
	return -1;
}

static int tfilter_notify(struct net *net, struct sk_buff *oskb,
			  struct nlmsghdr *n, struct tcf_proto *tp,
1853
			  struct tcf_block *block, struct Qdisc *q,
1854 1855
			  u32 parent, void *fh, int event, bool unicast,
			  bool rtnl_held)
1856 1857 1858
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1859
	int err = 0;
1860 1861 1862 1863 1864

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1865
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1866
			  n->nlmsg_seq, n->nlmsg_flags, event,
1867
			  false, rtnl_held) <= 0) {
1868 1869 1870 1871 1872
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
1873 1874 1875 1876
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1877

1878 1879 1880
	if (err > 0)
		err = 0;
	return err;
1881 1882 1883 1884
}

static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
			      struct nlmsghdr *n, struct tcf_proto *tp,
1885
			      struct tcf_block *block, struct Qdisc *q,
1886
			      u32 parent, void *fh, bool unicast, bool *last,
1887
			      bool rtnl_held, struct netlink_ext_ack *extack)
1888 1889 1890 1891 1892 1893 1894 1895 1896
{
	struct sk_buff *skb;
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	int err;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

1897
	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1898
			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1899
			  false, rtnl_held) <= 0) {
1900
		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1901 1902 1903 1904
		kfree_skb(skb);
		return -EINVAL;
	}

1905
	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1906 1907 1908 1909 1910 1911
	if (err) {
		kfree_skb(skb);
		return err;
	}

	if (unicast)
1912 1913 1914 1915
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     n->nlmsg_flags & NLM_F_ECHO);
1916 1917
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1918 1919 1920

	if (err > 0)
		err = 0;
1921
	return err;
1922 1923 1924
}

static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1925 1926
				 struct tcf_block *block, struct Qdisc *q,
				 u32 parent, struct nlmsghdr *n,
1927
				 struct tcf_chain *chain, int event)
1928 1929 1930
{
	struct tcf_proto *tp;

1931 1932
	for (tp = tcf_get_next_proto(chain, NULL);
	     tp; tp = tcf_get_next_proto(chain, tp))
1933
		tfilter_notify(net, oskb, n, tp, block,
1934
			       q, parent, NULL, event, false, true);
1935 1936
}

1937 1938 1939 1940 1941 1942
static void tfilter_put(struct tcf_proto *tp, void *fh)
{
	if (tp->ops->put && fh)
		tp->ops->put(tp, fh);
}

1943
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1944
			  struct netlink_ext_ack *extack)
Linus Torvalds's avatar
Linus Torvalds committed
1945
{
1946
	struct net *net = sock_net(skb->sk);
1947
	struct nlattr *tca[TCA_MAX + 1];
1948
	char name[IFNAMSIZ];
Linus Torvalds's avatar
Linus Torvalds committed
1949 1950 1951
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
1952
	bool prio_allocate;
Linus Torvalds's avatar
Linus Torvalds committed
1953
	u32 parent;
1954
	u32 chain_index;
1955
	struct Qdisc *q = NULL;
1956
	struct tcf_chain_info chain_info;
1957
	struct tcf_chain *chain = NULL;
1958
	struct tcf_block *block;
Linus Torvalds's avatar
Linus Torvalds committed
1959 1960
	struct tcf_proto *tp;
	unsigned long cl;
1961
	void *fh;
Linus Torvalds's avatar
Linus Torvalds committed
1962
	int err;
1963
	int tp_created;
1964
	bool rtnl_held = false;
Linus Torvalds's avatar
Linus Torvalds committed
1965

1966
	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1967
		return -EPERM;
1968

Linus Torvalds's avatar
Linus Torvalds committed
1969
replay:
1970 1971
	tp_created = 0;

1972 1973
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
1974 1975 1976
	if (err < 0)
		return err;

1977
	t = nlmsg_data(n);
Linus Torvalds's avatar
Linus Torvalds committed
1978 1979
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
1980
	prio_allocate = false;
Linus Torvalds's avatar
Linus Torvalds committed
1981
	parent = t->tcm_parent;
1982
	tp = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1983
	cl = 0;
1984
	block = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1985 1986

	if (prio == 0) {
1987 1988 1989 1990 1991 1992 1993
		/* If no priority is provided by the user,
		 * we allocate one.
		 */
		if (n->nlmsg_flags & NLM_F_CREATE) {
			prio = TC_H_MAKE(0x80000000U, 0U);
			prio_allocate = true;
		} else {
1994
			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
Linus Torvalds's avatar
Linus Torvalds committed
1995
			return -ENOENT;
1996
		}
Linus Torvalds's avatar
Linus Torvalds committed
1997 1998 1999 2000
	}

	/* Find head of filter chain. */

2001 2002 2003 2004
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2005 2006 2007 2008 2009 2010
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}

2011 2012 2013 2014 2015 2016
	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
	 * type is not specified, classifier is not unlocked.
	 */
	if (rtnl_held ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2017
	    !tcf_proto_is_unlocked(name)) {
2018 2019 2020 2021 2022 2023 2024 2025 2026 2027
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2028 2029 2030
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
2031
	}
2032
	block->classid = parent;
2033 2034 2035

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2036
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2037 2038 2039
		err = -EINVAL;
		goto errout;
	}
2040
	chain = tcf_chain_get(block, chain_index, true);
2041
	if (!chain) {
2042
		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2043
		err = -ENOMEM;
2044 2045
		goto errout;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2046

2047
	mutex_lock(&chain->filter_chain_lock);
2048 2049 2050
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, prio_allocate);
	if (IS_ERR(tp)) {
2051
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2052
		err = PTR_ERR(tp);
2053
		goto errout_locked;
Linus Torvalds's avatar
Linus Torvalds committed
2054 2055 2056
	}

	if (tp == NULL) {
2057 2058
		struct tcf_proto *tp_new = NULL;

2059 2060 2061 2062 2063
		if (chain->flushing) {
			err = -EAGAIN;
			goto errout_locked;
		}

Linus Torvalds's avatar
Linus Torvalds committed
2064 2065
		/* Proto-tcf does not exist, create new one */

2066
		if (tca[TCA_KIND] == NULL || !protocol) {
2067
			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2068
			err = -EINVAL;
2069
			goto errout_locked;
2070
		}
Linus Torvalds's avatar
Linus Torvalds committed
2071

2072
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2073
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2074
			err = -ENOENT;
2075
			goto errout_locked;
2076
		}
Linus Torvalds's avatar
Linus Torvalds committed
2077

2078
		if (prio_allocate)
2079 2080
			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
							       &chain_info));
Linus Torvalds's avatar
Linus Torvalds committed
2081

2082
		mutex_unlock(&chain->filter_chain_lock);
2083 2084
		tp_new = tcf_proto_create(name, protocol, prio, chain,
					  rtnl_held, extack);
2085 2086
		if (IS_ERR(tp_new)) {
			err = PTR_ERR(tp_new);
2087
			goto errout_tp;
Linus Torvalds's avatar
Linus Torvalds committed
2088
		}
2089

2090
		tp_created = 1;
2091 2092
		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
						rtnl_held);
2093 2094 2095 2096
		if (IS_ERR(tp)) {
			err = PTR_ERR(tp);
			goto errout_tp;
		}
2097 2098
	} else {
		mutex_unlock(&chain->filter_chain_lock);
2099
	}
Linus Torvalds's avatar
Linus Torvalds committed
2100

2101 2102 2103 2104 2105 2106
	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

Linus Torvalds's avatar
Linus Torvalds committed
2107 2108
	fh = tp->ops->get(tp, t->tcm_handle);

2109
	if (!fh) {
2110
		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2111
			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2112
			err = -ENOENT;
Linus Torvalds's avatar
Linus Torvalds committed
2113
			goto errout;
2114
		}
2115
	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2116
		tfilter_put(tp, fh);
2117 2118 2119
		NL_SET_ERR_MSG(extack, "Filter already exists");
		err = -EEXIST;
		goto errout;
Linus Torvalds's avatar
Linus Torvalds committed
2120 2121
	}

2122 2123 2124 2125 2126 2127
	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
		err = -EINVAL;
		goto errout;
	}

2128
	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2129
			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2130
			      rtnl_held, extack);
2131
	if (err == 0) {
2132
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2133
			       RTM_NEWTFILTER, false, rtnl_held);
2134
		tfilter_put(tp, fh);
2135 2136 2137
		/* q pointer is NULL for shared blocks */
		if (q)
			q->flags &= ~TCQ_F_CAN_BYPASS;
2138
	}
Linus Torvalds's avatar
Linus Torvalds committed
2139 2140

errout:
2141
	if (err && tp_created)
2142
		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2143
errout_tp:
2144 2145
	if (chain) {
		if (tp && !IS_ERR(tp))
2146
			tcf_proto_put(tp, rtnl_held, NULL);
2147 2148 2149
		if (!tp_created)
			tcf_chain_put(chain);
	}
2150
	tcf_block_release(q, block, rtnl_held);
2151 2152 2153 2154 2155 2156 2157 2158 2159

	if (rtnl_held)
		rtnl_unlock();

	if (err == -EAGAIN) {
		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
		 * of target chain.
		 */
		rtnl_held = true;
Linus Torvalds's avatar
Linus Torvalds committed
2160 2161
		/* Replay the request. */
		goto replay;
2162
	}
Linus Torvalds's avatar
Linus Torvalds committed
2163
	return err;
2164 2165 2166 2167

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
Linus Torvalds's avatar
Linus Torvalds committed
2168 2169
}

2170 2171 2172 2173 2174
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
2175
	char name[IFNAMSIZ];
2176 2177 2178 2179 2180 2181 2182 2183
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2184
	struct tcf_block *block = NULL;
2185 2186 2187 2188
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2189
	bool rtnl_held = false;
2190 2191 2192 2193

	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

2194 2195
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2211 2212 2213 2214
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2215 2216 2217 2218 2219
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}
2220 2221 2222 2223 2224 2225
	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
	 * found), qdisc is not unlocked, classifier type is not specified,
	 * classifier is not unlocked.
	 */
	if (!prio ||
	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2226
	    !tcf_proto_is_unlocked(name)) {
2227 2228 2229 2230 2231 2232 2233 2234 2235 2236
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
2250 2251 2252 2253 2254 2255 2256
		/* User requested flush on non-existent chain. Nothing to do,
		 * so just return success.
		 */
		if (prio == 0) {
			err = 0;
			goto errout;
		}
2257
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2258
		err = -ENOENT;
2259 2260 2261 2262 2263
		goto errout;
	}

	if (prio == 0) {
		tfilter_notify_chain(net, skb, block, q, parent, n,
2264
				     chain, RTM_DELTFILTER);
2265
		tcf_chain_flush(chain, rtnl_held);
2266 2267 2268 2269
		err = 0;
		goto errout;
	}

2270
	mutex_lock(&chain->filter_chain_lock);
2271 2272 2273 2274
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2275
		err = tp ? PTR_ERR(tp) : -ENOENT;
2276
		goto errout_locked;
2277 2278 2279
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
2280 2281
		goto errout_locked;
	} else if (t->tcm_handle == 0) {
2282
		tcf_proto_signal_destroying(chain, tp);
2283 2284 2285
		tcf_chain_tp_remove(chain, &chain_info, tp);
		mutex_unlock(&chain->filter_chain_lock);

2286
		tcf_proto_put(tp, rtnl_held, NULL);
2287
		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2288
			       RTM_DELTFILTER, false, rtnl_held);
2289
		err = 0;
2290 2291
		goto errout;
	}
2292
	mutex_unlock(&chain->filter_chain_lock);
2293 2294 2295 2296

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
2297 2298
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
2299 2300 2301 2302 2303
	} else {
		bool last;

		err = tfilter_del_notify(net, skb, n, tp, block,
					 q, parent, fh, false, &last,
2304 2305
					 rtnl_held, extack);

2306 2307
		if (err)
			goto errout;
2308
		if (last)
2309
			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2310 2311 2312
	}

errout:
2313 2314
	if (chain) {
		if (tp && !IS_ERR(tp))
2315
			tcf_proto_put(tp, rtnl_held, NULL);
2316
		tcf_chain_put(chain);
2317
	}
2318
	tcf_block_release(q, block, rtnl_held);
2319 2320 2321 2322

	if (rtnl_held)
		rtnl_unlock();

2323
	return err;
2324 2325 2326 2327

errout_locked:
	mutex_unlock(&chain->filter_chain_lock);
	goto errout;
2328 2329 2330 2331 2332 2333 2334
}

static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
			  struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
2335
	char name[IFNAMSIZ];
2336 2337 2338 2339 2340 2341 2342 2343
	struct tcmsg *t;
	u32 protocol;
	u32 prio;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain_info chain_info;
	struct tcf_chain *chain = NULL;
2344
	struct tcf_block *block = NULL;
2345 2346 2347 2348
	struct tcf_proto *tp = NULL;
	unsigned long cl = 0;
	void *fh = NULL;
	int err;
2349
	bool rtnl_held = false;
2350

2351 2352
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	protocol = TC_H_MIN(t->tcm_info);
	prio = TC_H_MAJ(t->tcm_info);
	parent = t->tcm_parent;

	if (prio == 0) {
		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
		return -ENOENT;
	}

	/* Find head of filter chain. */

2368 2369 2370 2371
	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
	if (err)
		return err;

2372 2373 2374 2375 2376
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
		err = -EINVAL;
		goto errout;
	}
2377 2378 2379 2380 2381
	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
	 * unlocked, classifier type is not specified, classifier is not
	 * unlocked.
	 */
	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2382
	    !tcf_proto_is_unlocked(name)) {
2383 2384 2385 2386 2387 2388 2389 2390 2391 2392
		rtnl_held = true;
		rtnl_lock();
	}

	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
	if (err)
		goto errout;

	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
				 extack);
2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410
	if (IS_ERR(block)) {
		err = PTR_ERR(block);
		goto errout;
	}

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
		err = -EINVAL;
		goto errout;
	}
	chain = tcf_chain_get(block, chain_index, false);
	if (!chain) {
		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
		err = -EINVAL;
		goto errout;
	}

2411
	mutex_lock(&chain->filter_chain_lock);
2412 2413
	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
			       prio, false);
2414
	mutex_unlock(&chain->filter_chain_lock);
2415 2416
	if (!tp || IS_ERR(tp)) {
		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2417
		err = tp ? PTR_ERR(tp) : -ENOENT;
2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431
		goto errout;
	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
		err = -EINVAL;
		goto errout;
	}

	fh = tp->ops->get(tp, t->tcm_handle);

	if (!fh) {
		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
		err = -ENOENT;
	} else {
		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2432
				     fh, RTM_NEWTFILTER, true, rtnl_held);
2433 2434 2435 2436
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
	}

2437
	tfilter_put(tp, fh);
2438
errout:
2439 2440
	if (chain) {
		if (tp && !IS_ERR(tp))
2441
			tcf_proto_put(tp, rtnl_held, NULL);
2442
		tcf_chain_put(chain);
2443
	}
2444
	tcf_block_release(q, block, rtnl_held);
2445 2446 2447 2448

	if (rtnl_held)
		rtnl_unlock();

2449 2450 2451
	return err;
}

2452
struct tcf_dump_args {
Linus Torvalds's avatar
Linus Torvalds committed
2453 2454 2455
	struct tcf_walker w;
	struct sk_buff *skb;
	struct netlink_callback *cb;
2456
	struct tcf_block *block;
2457 2458
	struct Qdisc *q;
	u32 parent;
2459
	bool terse_dump;
Linus Torvalds's avatar
Linus Torvalds committed
2460 2461
};

2462
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
Linus Torvalds's avatar
Linus Torvalds committed
2463
{
2464
	struct tcf_dump_args *a = (void *)arg;
2465
	struct net *net = sock_net(a->skb->sk);
Linus Torvalds's avatar
Linus Torvalds committed
2466

2467
	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2468
			     n, NETLINK_CB(a->cb->skb).portid,
2469
			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2470
			     RTM_NEWTFILTER, a->terse_dump, true);
Linus Torvalds's avatar
Linus Torvalds committed
2471 2472
}

2473 2474
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
			   struct sk_buff *skb, struct netlink_callback *cb,
2475
			   long index_start, long *p_index, bool terse)
2476 2477
{
	struct net *net = sock_net(skb->sk);
2478
	struct tcf_block *block = chain->block;
2479
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2480
	struct tcf_proto *tp, *tp_prev;
2481 2482
	struct tcf_dump_args arg;

2483 2484 2485 2486
	for (tp = __tcf_get_next_proto(chain, NULL);
	     tp;
	     tp_prev = tp,
		     tp = __tcf_get_next_proto(chain, tp),
2487
		     tcf_proto_put(tp_prev, true, NULL),
2488
		     (*p_index)++) {
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
		if (*p_index < index_start)
			continue;
		if (TC_H_MAJ(tcm->tcm_info) &&
		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
			continue;
		if (TC_H_MIN(tcm->tcm_info) &&
		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
			continue;
		if (*p_index > index_start)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (cb->args[1] == 0) {
2501
			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2502 2503
					  NETLINK_CB(cb->skb).portid,
					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2504
					  RTM_NEWTFILTER, false, true) <= 0)
2505
				goto errout;
2506 2507 2508 2509 2510 2511 2512
			cb->args[1] = 1;
		}
		if (!tp->ops->walk)
			continue;
		arg.w.fn = tcf_node_dump;
		arg.skb = skb;
		arg.cb = cb;
2513
		arg.block = block;
2514 2515
		arg.q = q;
		arg.parent = parent;
2516 2517 2518
		arg.w.stop = 0;
		arg.w.skip = cb->args[1] - 1;
		arg.w.count = 0;
2519
		arg.w.cookie = cb->args[2];
2520
		arg.terse_dump = terse;
2521
		tp->ops->walk(tp, &arg.w, true);
2522
		cb->args[2] = arg.w.cookie;
2523 2524
		cb->args[1] = arg.w.count + 1;
		if (arg.w.stop)
2525
			goto errout;
2526
	}
2527
	return true;
2528 2529

errout:
2530
	tcf_proto_put(tp, true, NULL);
2531
	return false;
2532 2533
}

2534 2535 2536 2537
static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
};

2538
/* called with RTNL */
Linus Torvalds's avatar
Linus Torvalds committed
2539 2540
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
2541
	struct tcf_chain *chain, *chain_prev;
2542
	struct net *net = sock_net(skb->sk);
2543
	struct nlattr *tca[TCA_MAX + 1];
2544
	struct Qdisc *q = NULL;
2545
	struct tcf_block *block;
2546
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2547
	bool terse_dump = false;
2548 2549
	long index_start;
	long index;
2550
	u32 parent;
2551
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
2552

2553
	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds's avatar
Linus Torvalds committed
2554
		return skb->len;
2555

2556
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2557
				     tcf_tfilter_dump_policy, cb->extack);
2558 2559 2560
	if (err)
		return err;

2561 2562 2563 2564 2565 2566 2567
	if (tca[TCA_DUMP_FLAGS]) {
		struct nla_bitfield32 flags =
			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);

		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
	}

2568
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2569
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2570 2571
		if (!block)
			goto out;
2572 2573 2574 2575 2576 2577 2578
		/* If we work with block index, q is NULL and parent value
		 * will never be used in the following code. The check
		 * in tcf_fill_node prevents it. However, compiler does not
		 * see that far, so set parent to zero to silence the warning
		 * about parent being uninitialized.
		 */
		parent = 0;
2579
	} else {
2580 2581 2582 2583 2584 2585 2586 2587 2588
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

		parent = tcm->tcm_parent;
2589
		if (!parent)
2590
			q = dev->qdisc;
2591
		else
2592 2593 2594 2595 2596
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
2597
			goto out;
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
2608
		parent = block->classid;
2609 2610
		if (tcf_block_shared(block))
			q = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
2611 2612
	}

2613 2614
	index_start = cb->args[0];
	index = 0;
2615

2616 2617 2618 2619 2620
	for (chain = __tcf_get_next_chain(block, NULL);
	     chain;
	     chain_prev = chain,
		     chain = __tcf_get_next_chain(block, chain),
		     tcf_chain_put(chain_prev)) {
2621 2622 2623
		if (tca[TCA_CHAIN] &&
		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
			continue;
2624
		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2625
				    index_start, &index, terse_dump)) {
2626
			tcf_chain_put(chain);
2627
			err = -EMSGSIZE;
2628
			break;
2629
		}
2630 2631
	}

2632
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2633
		tcf_block_refcnt_put(block, true);
2634
	cb->args[0] = index;
Linus Torvalds's avatar
Linus Torvalds committed
2635 2636

out:
2637 2638 2639
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
2640 2641 2642
	return skb->len;
}

2643 2644 2645 2646
static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
			      void *tmplt_priv, u32 chain_index,
			      struct net *net, struct sk_buff *skb,
			      struct tcf_block *block,
2647 2648 2649
			      u32 portid, u32 seq, u16 flags, int event)
{
	unsigned char *b = skb_tail_pointer(skb);
2650
	const struct tcf_proto_ops *ops;
2651 2652
	struct nlmsghdr *nlh;
	struct tcmsg *tcm;
2653 2654
	void *priv;

2655 2656
	ops = tmplt_ops;
	priv = tmplt_priv;
2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
	if (!nlh)
		goto out_nlmsg_trim;
	tcm = nlmsg_data(nlh);
	tcm->tcm_family = AF_UNSPEC;
	tcm->tcm__pad1 = 0;
	tcm->tcm__pad2 = 0;
	tcm->tcm_handle = 0;
	if (block->q) {
		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
		tcm->tcm_parent = block->q->handle;
	} else {
		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
		tcm->tcm_block_index = block->index;
	}

2674
	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2675 2676
		goto nla_put_failure;

2677 2678 2679 2680 2681 2682 2683
	if (ops) {
		if (nla_put_string(skb, TCA_KIND, ops->kind))
			goto nla_put_failure;
		if (ops->tmplt_dump(skb, net, priv) < 0)
			goto nla_put_failure;
	}

2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
	return skb->len;

out_nlmsg_trim:
nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
			   u32 seq, u16 flags, int event, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct tcf_block *block = chain->block;
	struct net *net = block->net;
	struct sk_buff *skb;
2700
	int err = 0;
2701 2702 2703 2704 2705

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

2706 2707
	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
			       chain->index, net, skb, block, portid,
2708 2709 2710 2711 2712 2713
			       seq, flags, event) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
2714 2715 2716 2717
		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
	else
		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
				     flags & NLM_F_ECHO);
2718

2719 2720 2721
	if (err > 0)
		err = 0;
	return err;
2722 2723
}

2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
				  void *tmplt_priv, u32 chain_index,
				  struct tcf_block *block, struct sk_buff *oskb,
				  u32 seq, u16 flags, bool unicast)
{
	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
	struct net *net = block->net;
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;

	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (unicast)
		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);

	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}

2749 2750 2751 2752 2753
static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
			      struct nlattr **tca,
			      struct netlink_ext_ack *extack)
{
	const struct tcf_proto_ops *ops;
2754
	char name[IFNAMSIZ];
2755 2756 2757 2758 2759 2760
	void *tmplt_priv;

	/* If kind is not set, user did not specify template. */
	if (!tca[TCA_KIND])
		return 0;

2761 2762 2763 2764 2765 2766
	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
		NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
		return -EINVAL;
	}

	ops = tcf_proto_lookup_ops(name, true, extack);
2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783
	if (IS_ERR(ops))
		return PTR_ERR(ops);
	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
		return -EOPNOTSUPP;
	}

	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
	if (IS_ERR(tmplt_priv)) {
		module_put(ops->owner);
		return PTR_ERR(tmplt_priv);
	}
	chain->tmplt_ops = ops;
	chain->tmplt_priv = tmplt_priv;
	return 0;
}

2784 2785
static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
			       void *tmplt_priv)
2786 2787
{
	/* If template ops are set, no work to do for us. */
2788
	if (!tmplt_ops)
2789 2790
		return;

2791 2792
	tmplt_ops->tmplt_destroy(tmplt_priv);
	module_put(tmplt_ops->owner);
2793 2794
}

2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815
/* Add/delete/get a chain */

static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
			struct netlink_ext_ack *extack)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct tcmsg *t;
	u32 parent;
	u32 chain_index;
	struct Qdisc *q = NULL;
	struct tcf_chain *chain = NULL;
	struct tcf_block *block;
	unsigned long cl;
	int err;

	if (n->nlmsg_type != RTM_GETCHAIN &&
	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

replay:
2816 2817
	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
				     rtm_tca_policy, extack);
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832
	if (err < 0)
		return err;

	t = nlmsg_data(n);
	parent = t->tcm_parent;
	cl = 0;

	block = tcf_block_find(net, &q, &parent, &cl,
			       t->tcm_ifindex, t->tcm_block_index, extack);
	if (IS_ERR(block))
		return PTR_ERR(block);

	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
	if (chain_index > TC_ACT_EXT_VAL_MASK) {
		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2833 2834
		err = -EINVAL;
		goto errout_block;
2835
	}
2836 2837

	mutex_lock(&block->lock);
2838 2839 2840
	chain = tcf_chain_lookup(block, chain_index);
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		if (chain) {
2841
			if (tcf_chain_held_by_acts_only(chain)) {
2842
				/* The chain exists only because there is
2843
				 * some action referencing it.
2844 2845 2846 2847
				 */
				tcf_chain_hold(chain);
			} else {
				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2848
				err = -EEXIST;
2849
				goto errout_block_locked;
2850 2851 2852 2853
			}
		} else {
			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2854
				err = -ENOENT;
2855
				goto errout_block_locked;
2856 2857 2858 2859
			}
			chain = tcf_chain_create(block, chain_index);
			if (!chain) {
				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2860
				err = -ENOMEM;
2861
				goto errout_block_locked;
2862
			}
2863 2864
		}
	} else {
2865
		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2866
			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2867
			err = -EINVAL;
2868
			goto errout_block_locked;
2869 2870 2871 2872
		}
		tcf_chain_hold(chain);
	}

2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883
	if (n->nlmsg_type == RTM_NEWCHAIN) {
		/* Modifying chain requires holding parent block lock. In case
		 * the chain was successfully added, take a reference to the
		 * chain. This ensures that an empty chain does not disappear at
		 * the end of this function.
		 */
		tcf_chain_hold(chain);
		chain->explicitly_created = true;
	}
	mutex_unlock(&block->lock);

2884 2885
	switch (n->nlmsg_type) {
	case RTM_NEWCHAIN:
2886
		err = tc_chain_tmplt_add(chain, net, tca, extack);
2887 2888
		if (err) {
			tcf_chain_put_explicitly_created(chain);
2889
			goto errout;
2890 2891
		}

2892 2893 2894 2895
		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
				RTM_NEWCHAIN, false);
		break;
	case RTM_DELCHAIN:
2896
		tfilter_notify_chain(net, skb, block, q, parent, n,
2897
				     chain, RTM_DELTFILTER);
2898
		/* Flush the chain first as the user requested chain removal. */
2899
		tcf_chain_flush(chain, true);
2900 2901 2902 2903 2904 2905 2906
		/* In case the chain was successfully deleted, put a reference
		 * to the chain previously taken during addition.
		 */
		tcf_chain_put_explicitly_created(chain);
		break;
	case RTM_GETCHAIN:
		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2907
				      n->nlmsg_flags, n->nlmsg_type, true);
2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918
		if (err < 0)
			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
		break;
	default:
		err = -EOPNOTSUPP;
		NL_SET_ERR_MSG(extack, "Unsupported message type");
		goto errout;
	}

errout:
	tcf_chain_put(chain);
2919
errout_block:
2920
	tcf_block_release(q, block, true);
2921 2922 2923 2924
	if (err == -EAGAIN)
		/* Replay the request. */
		goto replay;
	return err;
2925 2926 2927 2928

errout_block_locked:
	mutex_unlock(&block->lock);
	goto errout_block;
2929 2930 2931 2932 2933 2934 2935 2936 2937 2938
}

/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct nlattr *tca[TCA_MAX + 1];
	struct Qdisc *q = NULL;
	struct tcf_block *block;
	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2939
	struct tcf_chain *chain;
2940 2941 2942 2943 2944 2945 2946
	long index_start;
	long index;
	int err;

	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
		return skb->len;

2947 2948
	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
				     rtm_tca_policy, cb->extack);
2949 2950 2951 2952
	if (err)
		return err;

	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2953
		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964
		if (!block)
			goto out;
	} else {
		const struct Qdisc_class_ops *cops;
		struct net_device *dev;
		unsigned long cl = 0;

		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
		if (!dev)
			return skb->len;

2965
		if (!tcm->tcm_parent)
2966
			q = dev->qdisc;
2967
		else
2968
			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2969

2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991
		if (!q)
			goto out;
		cops = q->ops->cl_ops;
		if (!cops)
			goto out;
		if (!cops->tcf_block)
			goto out;
		if (TC_H_MIN(tcm->tcm_parent)) {
			cl = cops->find(q, tcm->tcm_parent);
			if (cl == 0)
				goto out;
		}
		block = cops->tcf_block(q, cl, NULL);
		if (!block)
			goto out;
		if (tcf_block_shared(block))
			q = NULL;
	}

	index_start = cb->args[0];
	index = 0;

2992 2993
	mutex_lock(&block->lock);
	list_for_each_entry(chain, &block->chain_list, list) {
2994 2995 2996 2997 2998 2999 3000
		if ((tca[TCA_CHAIN] &&
		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
			continue;
		if (index < index_start) {
			index++;
			continue;
		}
3001 3002
		if (tcf_chain_held_by_acts_only(chain))
			continue;
3003 3004
		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
					 chain->index, net, skb, block,
3005 3006 3007
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
					 RTM_NEWCHAIN);
3008
		if (err <= 0)
3009 3010 3011
			break;
		index++;
	}
3012
	mutex_unlock(&block->lock);
3013

3014
	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3015
		tcf_block_refcnt_put(block, true);
3016 3017 3018 3019 3020 3021 3022 3023 3024
	cb->args[0] = index;

out:
	/* If we did no progress, the error (EMSGSIZE) is real */
	if (skb->len == 0 && err)
		return err;
	return skb->len;
}

3025
void tcf_exts_destroy(struct tcf_exts *exts)
Linus Torvalds's avatar
Linus Torvalds committed
3026 3027
{
#ifdef CONFIG_NET_CLS_ACT
3028 3029 3030 3031
	if (exts->actions) {
		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
		kfree(exts->actions);
	}
3032
	exts->nr_actions = 0;
Linus Torvalds's avatar
Linus Torvalds committed
3033 3034
#endif
}
3035
EXPORT_SYMBOL(tcf_exts_destroy);
Linus Torvalds's avatar
Linus Torvalds committed
3036

3037
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3038
		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3039
		      bool rtnl_held, struct netlink_ext_ack *extack)
Linus Torvalds's avatar
Linus Torvalds committed
3040 3041 3042
{
#ifdef CONFIG_NET_CLS_ACT
	{
3043
		int init_res[TCA_ACT_MAX_PRIO] = {};
Linus Torvalds's avatar
Linus Torvalds committed
3044
		struct tc_action *act;
3045
		size_t attr_size = 0;
Linus Torvalds's avatar
Linus Torvalds committed
3046

3047
		if (exts->police && tb[exts->police]) {
3048 3049 3050 3051 3052
			struct tc_action_ops *a_o;

			a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
			if (IS_ERR(a_o))
				return PTR_ERR(a_o);
3053 3054
			act = tcf_action_init_1(net, tp, tb[exts->police],
						rate_tlv, "police", ovr,
3055 3056
						TCA_ACT_BIND, a_o, init_res,
						rtnl_held, extack);
3057 3058
			module_put(a_o->owner);
			if (IS_ERR(act))
3059
				return PTR_ERR(act);
Linus Torvalds's avatar
Linus Torvalds committed
3060

3061
			act->type = exts->type = TCA_OLD_COMPAT;
3062 3063
			exts->actions[0] = act;
			exts->nr_actions = 1;
3064
			tcf_idr_insert_many(exts->actions);
3065
		} else if (exts->action && tb[exts->action]) {
3066
			int err;
3067

3068 3069
			err = tcf_action_init(net, tp, tb[exts->action],
					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
3070 3071
					      exts->actions, init_res,
					      &attr_size, rtnl_held, extack);
3072
			if (err < 0)
3073
				return err;
3074
			exts->nr_actions = err;
Linus Torvalds's avatar
Linus Torvalds committed
3075 3076 3077
		}
	}
#else
3078
	if ((exts->action && tb[exts->action]) ||
3079 3080
	    (exts->police && tb[exts->police])) {
		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
Linus Torvalds's avatar
Linus Torvalds committed
3081
		return -EOPNOTSUPP;
3082
	}
Linus Torvalds's avatar
Linus Torvalds committed
3083 3084 3085 3086
#endif

	return 0;
}
3087
EXPORT_SYMBOL(tcf_exts_validate);
Linus Torvalds's avatar
Linus Torvalds committed
3088

3089
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
Linus Torvalds's avatar
Linus Torvalds committed
3090 3091
{
#ifdef CONFIG_NET_CLS_ACT
3092 3093
	struct tcf_exts old = *dst;

3094
	*dst = *src;
3095
	tcf_exts_destroy(&old);
Linus Torvalds's avatar
Linus Torvalds committed
3096 3097
#endif
}
3098
EXPORT_SYMBOL(tcf_exts_change);
Linus Torvalds's avatar
Linus Torvalds committed
3099

3100 3101 3102 3103 3104 3105 3106 3107 3108
#ifdef CONFIG_NET_CLS_ACT
static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
{
	if (exts->nr_actions == 0)
		return NULL;
	else
		return exts->actions[0];
}
#endif
3109

3110
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
Linus Torvalds's avatar
Linus Torvalds committed
3111 3112
{
#ifdef CONFIG_NET_CLS_ACT
3113 3114
	struct nlattr *nest;

3115
	if (exts->action && tcf_exts_has_actions(exts)) {
Linus Torvalds's avatar
Linus Torvalds committed
3116 3117 3118 3119 3120
		/*
		 * again for backward compatible mode - we want
		 * to work with both old and new modes of entering
		 * tc data even if iproute2  was newer - jhs
		 */
3121
		if (exts->type != TCA_OLD_COMPAT) {
3122
			nest = nla_nest_start_noflag(skb, exts->action);
3123 3124
			if (nest == NULL)
				goto nla_put_failure;
3125

3126 3127
			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
			    < 0)
3128
				goto nla_put_failure;
3129
			nla_nest_end(skb, nest);
3130
		} else if (exts->police) {
3131
			struct tc_action *act = tcf_exts_first_act(exts);
3132
			nest = nla_nest_start_noflag(skb, exts->police);
3133
			if (nest == NULL || !act)
3134
				goto nla_put_failure;
3135
			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3136
				goto nla_put_failure;
3137
			nla_nest_end(skb, nest);
Linus Torvalds's avatar
Linus Torvalds committed
3138 3139 3140
		}
	}
	return 0;
3141 3142 3143

nla_put_failure:
	nla_nest_cancel(skb, nest);
Linus Torvalds's avatar
Linus Torvalds committed
3144
	return -1;
3145 3146 3147
#else
	return 0;
#endif
Linus Torvalds's avatar
Linus Torvalds committed
3148
}
3149
EXPORT_SYMBOL(tcf_exts_dump);
Linus Torvalds's avatar
Linus Torvalds committed
3150

3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175
int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
	struct nlattr *nest;

	if (!exts->action || !tcf_exts_has_actions(exts))
		return 0;

	nest = nla_nest_start_noflag(skb, exts->action);
	if (!nest)
		goto nla_put_failure;

	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
		goto nla_put_failure;
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
#else
	return 0;
#endif
}
EXPORT_SYMBOL(tcf_exts_terse_dump);
3176

3177
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
Linus Torvalds's avatar
Linus Torvalds committed
3178 3179
{
#ifdef CONFIG_NET_CLS_ACT
3180
	struct tc_action *a = tcf_exts_first_act(exts);
3181
	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3182
		return -1;
Linus Torvalds's avatar
Linus Torvalds committed
3183 3184 3185
#endif
	return 0;
}
3186
EXPORT_SYMBOL(tcf_exts_dump_stats);
Linus Torvalds's avatar
Linus Torvalds committed
3187

3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237
static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
{
	if (*flags & TCA_CLS_FLAGS_IN_HW)
		return;
	*flags |= TCA_CLS_FLAGS_IN_HW;
	atomic_inc(&block->offloadcnt);
}

static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
{
	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
		return;
	*flags &= ~TCA_CLS_FLAGS_IN_HW;
	atomic_dec(&block->offloadcnt);
}

static void tc_cls_offload_cnt_update(struct tcf_block *block,
				      struct tcf_proto *tp, u32 *cnt,
				      u32 *flags, u32 diff, bool add)
{
	lockdep_assert_held(&block->cb_lock);

	spin_lock(&tp->lock);
	if (add) {
		if (!*cnt)
			tcf_block_offload_inc(block, flags);
		*cnt += diff;
	} else {
		*cnt -= diff;
		if (!*cnt)
			tcf_block_offload_dec(block, flags);
	}
	spin_unlock(&tp->lock);
}

static void
tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
			 u32 *cnt, u32 *flags)
{
	lockdep_assert_held(&block->cb_lock);

	spin_lock(&tp->lock);
	tcf_block_offload_dec(block, flags);
	*cnt = 0;
	spin_unlock(&tp->lock);
}

static int
__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		   void *type_data, bool err_stop)
3238
{
3239
	struct flow_block_cb *block_cb;
3240 3241 3242
	int ok_count = 0;
	int err;

3243
	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3244 3245
		err = block_cb->cb(type, type_data, block_cb->cb_priv);
		if (err) {
3246 3247
			if (err_stop)
				return err;
3248 3249 3250 3251
		} else {
			ok_count++;
		}
	}
3252 3253 3254 3255 3256 3257
	return ok_count;
}

int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
		     void *type_data, bool err_stop, bool rtnl_held)
{
3258
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3259 3260
	int ok_count;

3261 3262 3263
retry:
	if (take_rtnl)
		rtnl_lock();
3264
	down_read(&block->cb_lock);
3265 3266 3267 3268 3269 3270 3271 3272 3273 3274
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3275
	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3276

3277
	up_read(&block->cb_lock);
3278 3279
	if (take_rtnl)
		rtnl_unlock();
3280
	return ok_count;
3281 3282
}
EXPORT_SYMBOL(tc_setup_cb_call);
3283

3284 3285 3286 3287 3288 3289 3290 3291 3292 3293
/* Non-destructive filter add. If filter that wasn't already in hardware is
 * successfully offloaded, increment block offloads counter. On failure,
 * previously offloaded filter is considered to be intact and offloads counter
 * is not decremented.
 */

int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
		    enum tc_setup_type type, void *type_data, bool err_stop,
		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
3294
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3295 3296
	int ok_count;

3297 3298 3299
retry:
	if (take_rtnl)
		rtnl_lock();
3300
	down_read(&block->cb_lock);
3301 3302 3303 3304 3305 3306 3307 3308 3309 3310
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3311 3312 3313 3314 3315 3316 3317
	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop) {
		ok_count = -EOPNOTSUPP;
		goto err_unlock;
	}

	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3318 3319 3320 3321 3322
	if (ok_count < 0)
		goto err_unlock;

	if (tp->ops->hw_add)
		tp->ops->hw_add(tp, type_data);
3323 3324 3325 3326 3327
	if (ok_count > 0)
		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
					  ok_count, true);
err_unlock:
	up_read(&block->cb_lock);
3328 3329
	if (take_rtnl)
		rtnl_unlock();
3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_add);

/* Destructive filter replace. If filter that wasn't already in hardware is
 * successfully offloaded, increment block offload counter. On failure,
 * previously offloaded filter is considered to be destroyed and offload counter
 * is decremented.
 */

int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
			enum tc_setup_type type, void *type_data, bool err_stop,
			u32 *old_flags, unsigned int *old_in_hw_count,
			u32 *new_flags, unsigned int *new_in_hw_count,
			bool rtnl_held)
{
3346
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3347 3348
	int ok_count;

3349 3350 3351
retry:
	if (take_rtnl)
		rtnl_lock();
3352
	down_read(&block->cb_lock);
3353 3354 3355 3356 3357 3358 3359 3360 3361 3362
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3363 3364 3365 3366 3367 3368 3369
	/* Make sure all netdevs sharing this block are offload-capable. */
	if (block->nooffloaddevcnt && err_stop) {
		ok_count = -EOPNOTSUPP;
		goto err_unlock;
	}

	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3370 3371
	if (tp->ops->hw_del)
		tp->ops->hw_del(tp, type_data);
3372 3373

	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3374 3375 3376 3377 3378
	if (ok_count < 0)
		goto err_unlock;

	if (tp->ops->hw_add)
		tp->ops->hw_add(tp, type_data);
3379
	if (ok_count > 0)
3380 3381
		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
					  new_flags, ok_count, true);
3382 3383
err_unlock:
	up_read(&block->cb_lock);
3384 3385
	if (take_rtnl)
		rtnl_unlock();
3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_replace);

/* Destroy filter and decrement block offload counter, if filter was previously
 * offloaded.
 */

int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
			enum tc_setup_type type, void *type_data, bool err_stop,
			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
{
3398
	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3399 3400
	int ok_count;

3401 3402 3403
retry:
	if (take_rtnl)
		rtnl_lock();
3404
	down_read(&block->cb_lock);
3405 3406 3407 3408 3409 3410 3411 3412 3413 3414
	/* Need to obtain rtnl lock if block is bound to devs that require it.
	 * In block bind code cb_lock is obtained while holding rtnl, so we must
	 * obtain the locks in same order here.
	 */
	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
		up_read(&block->cb_lock);
		take_rtnl = true;
		goto retry;
	}

3415 3416 3417
	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);

	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3418 3419 3420
	if (tp->ops->hw_del)
		tp->ops->hw_del(tp, type_data);

3421
	up_read(&block->cb_lock);
3422 3423
	if (take_rtnl)
		rtnl_unlock();
3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446
	return ok_count < 0 ? ok_count : 0;
}
EXPORT_SYMBOL(tc_setup_cb_destroy);

int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
			  bool add, flow_setup_cb_t *cb,
			  enum tc_setup_type type, void *type_data,
			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
{
	int err = cb(type, type_data, cb_priv);

	if (err) {
		if (add && tc_skip_sw(*flags))
			return err;
	} else {
		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
					  add);
	}

	return 0;
}
EXPORT_SYMBOL(tc_setup_cb_reoffload);

3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470
static int tcf_act_get_cookie(struct flow_action_entry *entry,
			      const struct tc_action *act)
{
	struct tc_cookie *cookie;
	int err = 0;

	rcu_read_lock();
	cookie = rcu_dereference(act->act_cookie);
	if (cookie) {
		entry->cookie = flow_action_cookie_create(cookie->data,
							  cookie->len,
							  GFP_ATOMIC);
		if (!entry->cookie)
			err = -ENOMEM;
	}
	rcu_read_unlock();
	return err;
}

static void tcf_act_put_cookie(struct flow_action_entry *entry)
{
	flow_action_cookie_destroy(entry->cookie);
}

3471 3472 3473 3474 3475
void tc_cleanup_flow_action(struct flow_action *flow_action)
{
	struct flow_action_entry *entry;
	int i;

3476 3477
	flow_action_for_each(i, entry, flow_action) {
		tcf_act_put_cookie(entry);
3478 3479
		if (entry->destructor)
			entry->destructor(entry->destructor_priv);
3480
	}
3481 3482 3483
}
EXPORT_SYMBOL(tc_cleanup_flow_action);

3484 3485 3486
static void tcf_mirred_get_dev(struct flow_action_entry *entry,
			       const struct tc_action *act)
{
3487 3488
#ifdef CONFIG_NET_CLS_ACT
	entry->dev = act->ops->get_dev(act, &entry->destructor);
3489 3490 3491
	if (!entry->dev)
		return;
	entry->destructor_priv = entry->dev;
3492
#endif
3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512
}

static void tcf_tunnel_encap_put_tunnel(void *priv)
{
	struct ip_tunnel_info *tunnel = priv;

	kfree(tunnel);
}

static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
				       const struct tc_action *act)
{
	entry->tunnel = tcf_tunnel_info_copy(act);
	if (!entry->tunnel)
		return -ENOMEM;
	entry->destructor = tcf_tunnel_encap_put_tunnel;
	entry->destructor_priv = entry->tunnel;
	return 0;
}

3513 3514 3515 3516 3517 3518 3519 3520 3521 3522
static void tcf_sample_get_group(struct flow_action_entry *entry,
				 const struct tc_action *act)
{
#ifdef CONFIG_NET_CLS_ACT
	entry->sample.psample_group =
		act->ops->get_psample_group(act, &entry->destructor);
	entry->destructor_priv = entry->sample.psample_group;
#endif
}

3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543
static void tcf_gate_entry_destructor(void *priv)
{
	struct action_gate_entry *oe = priv;

	kfree(oe);
}

static int tcf_gate_get_entries(struct flow_action_entry *entry,
				const struct tc_action *act)
{
	entry->gate.entries = tcf_gate_get_list(act);

	if (!entry->gate.entries)
		return -EINVAL;

	entry->destructor = tcf_gate_entry_destructor;
	entry->destructor_priv = entry->gate.entries;

	return 0;
}

3544 3545 3546 3547 3548 3549 3550 3551 3552 3553
static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
{
	if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
		return FLOW_ACTION_HW_STATS_DONT_CARE;
	else if (!hw_stats)
		return FLOW_ACTION_HW_STATS_DISABLED;

	return hw_stats;
}

3554
int tc_setup_flow_action(struct flow_action *flow_action,
3555
			 const struct tcf_exts *exts)
3556
{
3557
	struct tc_action *act;
3558
	int i, j, k, err = 0;
3559

3560 3561 3562
	BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3563

3564 3565 3566 3567 3568 3569 3570 3571
	if (!exts)
		return 0;

	j = 0;
	tcf_exts_for_each_action(i, act, exts) {
		struct flow_action_entry *entry;

		entry = &flow_action->entries[j];
3572
		spin_lock_bh(&act->tcfa_lock);
3573 3574 3575
		err = tcf_act_get_cookie(entry, act);
		if (err)
			goto err_out_locked;
3576

3577
		entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3578

3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589
		if (is_tcf_gact_ok(act)) {
			entry->id = FLOW_ACTION_ACCEPT;
		} else if (is_tcf_gact_shot(act)) {
			entry->id = FLOW_ACTION_DROP;
		} else if (is_tcf_gact_trap(act)) {
			entry->id = FLOW_ACTION_TRAP;
		} else if (is_tcf_gact_goto_chain(act)) {
			entry->id = FLOW_ACTION_GOTO;
			entry->chain_index = tcf_gact_goto_chain_index(act);
		} else if (is_tcf_mirred_egress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT;
3590
			tcf_mirred_get_dev(entry, act);
3591 3592
		} else if (is_tcf_mirred_egress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED;
3593
			tcf_mirred_get_dev(entry, act);
3594 3595
		} else if (is_tcf_mirred_ingress_redirect(act)) {
			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3596
			tcf_mirred_get_dev(entry, act);
3597 3598
		} else if (is_tcf_mirred_ingress_mirror(act)) {
			entry->id = FLOW_ACTION_MIRRED_INGRESS;
3599
			tcf_mirred_get_dev(entry, act);
3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617
		} else if (is_tcf_vlan(act)) {
			switch (tcf_vlan_action(act)) {
			case TCA_VLAN_ACT_PUSH:
				entry->id = FLOW_ACTION_VLAN_PUSH;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			case TCA_VLAN_ACT_POP:
				entry->id = FLOW_ACTION_VLAN_POP;
				break;
			case TCA_VLAN_ACT_MODIFY:
				entry->id = FLOW_ACTION_VLAN_MANGLE;
				entry->vlan.vid = tcf_vlan_push_vid(act);
				entry->vlan.proto = tcf_vlan_push_proto(act);
				entry->vlan.prio = tcf_vlan_push_prio(act);
				break;
			default:
3618
				err = -EOPNOTSUPP;
3619
				goto err_out_locked;
3620 3621 3622
			}
		} else if (is_tcf_tunnel_set(act)) {
			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3623 3624
			err = tcf_tunnel_encap_get_tunnel(entry, act);
			if (err)
3625
				goto err_out_locked;
3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637
		} else if (is_tcf_tunnel_release(act)) {
			entry->id = FLOW_ACTION_TUNNEL_DECAP;
		} else if (is_tcf_pedit(act)) {
			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
				switch (tcf_pedit_cmd(act, k)) {
				case TCA_PEDIT_KEY_EX_CMD_SET:
					entry->id = FLOW_ACTION_MANGLE;
					break;
				case TCA_PEDIT_KEY_EX_CMD_ADD:
					entry->id = FLOW_ACTION_ADD;
					break;
				default:
3638
					err = -EOPNOTSUPP;
3639
					goto err_out_locked;
3640 3641 3642 3643 3644
				}
				entry->mangle.htype = tcf_pedit_htype(act, k);
				entry->mangle.mask = tcf_pedit_mask(act, k);
				entry->mangle.val = tcf_pedit_val(act, k);
				entry->mangle.offset = tcf_pedit_offset(act, k);
3645
				entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3646
				entry = &flow_action->entries[++j];
3647 3648 3649 3650 3651 3652 3653
			}
		} else if (is_tcf_csum(act)) {
			entry->id = FLOW_ACTION_CSUM;
			entry->csum_flags = tcf_csum_update_flags(act);
		} else if (is_tcf_skbedit_mark(act)) {
			entry->id = FLOW_ACTION_MARK;
			entry->mark = tcf_skbedit_mark(act);
3654 3655 3656 3657 3658
		} else if (is_tcf_sample(act)) {
			entry->id = FLOW_ACTION_SAMPLE;
			entry->sample.trunc_size = tcf_sample_trunc_size(act);
			entry->sample.truncate = tcf_sample_truncate(act);
			entry->sample.rate = tcf_sample_rate(act);
3659
			tcf_sample_get_group(entry, act);
3660 3661
		} else if (is_tcf_police(act)) {
			entry->id = FLOW_ACTION_POLICE;
3662
			entry->police.burst = tcf_police_burst(act);
3663 3664
			entry->police.rate_bytes_ps =
				tcf_police_rate_bytes_ps(act);
3665 3666 3667
			entry->police.burst_pkt = tcf_police_burst_pkt(act);
			entry->police.rate_pkt_ps =
				tcf_police_rate_pkt_ps(act);
3668
			entry->police.mtu = tcf_police_tcfp_mtu(act);
3669
			entry->police.index = act->tcfa_index;
Paul Blakey's avatar
Paul Blakey committed
3670 3671 3672 3673
		} else if (is_tcf_ct(act)) {
			entry->id = FLOW_ACTION_CT;
			entry->ct.action = tcf_ct_action(act);
			entry->ct.zone = tcf_ct_zone(act);
3674
			entry->ct.flow_table = tcf_ct_ft(act);
3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696
		} else if (is_tcf_mpls(act)) {
			switch (tcf_mpls_action(act)) {
			case TCA_MPLS_ACT_PUSH:
				entry->id = FLOW_ACTION_MPLS_PUSH;
				entry->mpls_push.proto = tcf_mpls_proto(act);
				entry->mpls_push.label = tcf_mpls_label(act);
				entry->mpls_push.tc = tcf_mpls_tc(act);
				entry->mpls_push.bos = tcf_mpls_bos(act);
				entry->mpls_push.ttl = tcf_mpls_ttl(act);
				break;
			case TCA_MPLS_ACT_POP:
				entry->id = FLOW_ACTION_MPLS_POP;
				entry->mpls_pop.proto = tcf_mpls_proto(act);
				break;
			case TCA_MPLS_ACT_MODIFY:
				entry->id = FLOW_ACTION_MPLS_MANGLE;
				entry->mpls_mangle.label = tcf_mpls_label(act);
				entry->mpls_mangle.tc = tcf_mpls_tc(act);
				entry->mpls_mangle.bos = tcf_mpls_bos(act);
				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
				break;
			default:
3697
				goto err_out_locked;
3698
			}
3699 3700 3701
		} else if (is_tcf_skbedit_ptype(act)) {
			entry->id = FLOW_ACTION_PTYPE;
			entry->ptype = tcf_skbedit_ptype(act);
3702 3703 3704
		} else if (is_tcf_skbedit_priority(act)) {
			entry->id = FLOW_ACTION_PRIORITY;
			entry->priority = tcf_skbedit_priority(act);
3705 3706 3707 3708 3709 3710 3711 3712 3713 3714
		} else if (is_tcf_gate(act)) {
			entry->id = FLOW_ACTION_GATE;
			entry->gate.index = tcf_gate_index(act);
			entry->gate.prio = tcf_gate_prio(act);
			entry->gate.basetime = tcf_gate_basetime(act);
			entry->gate.cycletime = tcf_gate_cycletime(act);
			entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
			entry->gate.num_entries = tcf_gate_num_entries(act);
			err = tcf_gate_get_entries(entry, act);
			if (err)
3715
				goto err_out_locked;
3716
		} else {
3717
			err = -EOPNOTSUPP;
3718
			goto err_out_locked;
3719
		}
3720
		spin_unlock_bh(&act->tcfa_lock);
3721 3722 3723 3724

		if (!is_tcf_pedit(act))
			j++;
	}
3725

3726
err_out:
3727 3728 3729
	if (err)
		tc_cleanup_flow_action(flow_action);

3730
	return err;
3731 3732 3733
err_out_locked:
	spin_unlock_bh(&act->tcfa_lock);
	goto err_out;
3734 3735 3736
}
EXPORT_SYMBOL(tc_setup_flow_action);

3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752
unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
{
	unsigned int num_acts = 0;
	struct tc_action *act;
	int i;

	tcf_exts_for_each_action(i, act, exts) {
		if (is_tcf_pedit(act))
			num_acts += tcf_pedit_nkeys(act);
		else
			num_acts++;
	}
	return num_acts;
}
EXPORT_SYMBOL(tcf_exts_num_actions);

3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824
#ifdef CONFIG_NET_CLS_ACT
static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
					u32 *p_block_index,
					struct netlink_ext_ack *extack)
{
	*p_block_index = nla_get_u32(block_index_attr);
	if (!*p_block_index) {
		NL_SET_ERR_MSG(extack, "Block number may not be zero");
		return -EINVAL;
	}

	return 0;
}

int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
		    enum flow_block_binder_type binder_type,
		    struct nlattr *block_index_attr,
		    struct netlink_ext_ack *extack)
{
	u32 block_index;
	int err;

	if (!block_index_attr)
		return 0;

	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
	if (err)
		return err;

	if (!block_index)
		return 0;

	qe->info.binder_type = binder_type;
	qe->info.chain_head_change = tcf_chain_head_change_dflt;
	qe->info.chain_head_change_priv = &qe->filter_chain;
	qe->info.block_index = block_index;

	return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
}
EXPORT_SYMBOL(tcf_qevent_init);

void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
{
	if (qe->info.block_index)
		tcf_block_put_ext(qe->block, sch, &qe->info);
}
EXPORT_SYMBOL(tcf_qevent_destroy);

int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
			       struct netlink_ext_ack *extack)
{
	u32 block_index;
	int err;

	if (!block_index_attr)
		return 0;

	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
	if (err)
		return err;

	/* Bounce newly-configured block or change in block. */
	if (block_index != qe->info.block_index) {
		NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
		return -EINVAL;
	}

	return 0;
}
EXPORT_SYMBOL(tcf_qevent_validate_change);

struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3825
				  struct sk_buff **to_free, int *ret)
3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865
{
	struct tcf_result cl_res;
	struct tcf_proto *fl;

	if (!qe->info.block_index)
		return skb;

	fl = rcu_dereference_bh(qe->filter_chain);

	switch (tcf_classify(skb, fl, &cl_res, false)) {
	case TC_ACT_SHOT:
		qdisc_qstats_drop(sch);
		__qdisc_drop(skb, to_free);
		*ret = __NET_XMIT_BYPASS;
		return NULL;
	case TC_ACT_STOLEN:
	case TC_ACT_QUEUED:
	case TC_ACT_TRAP:
		__qdisc_drop(skb, to_free);
		*ret = __NET_XMIT_STOLEN;
		return NULL;
	case TC_ACT_REDIRECT:
		skb_do_redirect(skb);
		*ret = __NET_XMIT_STOLEN;
		return NULL;
	}

	return skb;
}
EXPORT_SYMBOL(tcf_qevent_handle);

int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
{
	if (!qe->info.block_index)
		return 0;
	return nla_put_u32(skb, attr_name, qe->info.block_index);
}
EXPORT_SYMBOL(tcf_qevent_dump);
#endif

3866 3867 3868 3869
static __net_init int tcf_net_init(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

3870
	spin_lock_init(&tn->idr_lock);
3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888
	idr_init(&tn->idr);
	return 0;
}

static void __net_exit tcf_net_exit(struct net *net)
{
	struct tcf_net *tn = net_generic(net, tcf_net_id);

	idr_destroy(&tn->idr);
}

static struct pernet_operations tcf_net_ops = {
	.init = tcf_net_init,
	.exit = tcf_net_exit,
	.id   = &tcf_net_id,
	.size = sizeof(struct tcf_net),
};

Linus Torvalds's avatar
Linus Torvalds committed
3889 3890
static int __init tc_filter_init(void)
{
3891 3892
	int err;

3893 3894 3895 3896
	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
	if (!tc_filter_wq)
		return -ENOMEM;

3897 3898 3899 3900
	err = register_pernet_subsys(&tcf_net_ops);
	if (err)
		goto err_register_pernet_subsys;

3901 3902 3903 3904
	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
		      RTNL_FLAG_DOIT_UNLOCKED);
3905
	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3906
		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3907 3908 3909 3910
	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
		      tc_dump_chain, 0);
Linus Torvalds's avatar
Linus Torvalds committed
3911 3912

	return 0;
3913 3914 3915 3916

err_register_pernet_subsys:
	destroy_workqueue(tc_filter_wq);
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
3917 3918 3919
}

subsys_initcall(tc_filter_init);