1. 20 Feb, 2024 5 commits
    • Eric Dumazet's avatar
      net: reorganize "struct sock" fields · 5d4cc874
      Eric Dumazet authored
      Last major reorg happened in commit 9115e8cd ("net: reorganize
      struct sock for better data locality")
      
      Since then, many changes have been done.
      
      Before SO_PEEK_OFF support is added to TCP, we need
      to move sk_peek_off to a better location.
      
      It is time to make another pass, and add six groups,
      without explicit alignment.
      
      - sock_write_rx (following sk_refcnt) read-write fields in rx path.
      - sock_read_rx read-mostly fields in rx path.
      - sock_read_rxtx read-mostly fields in both rx and tx paths.
      - sock_write_rxtx read-write fields in both rx and tx paths.
      - sock_write_tx read-write fields in tx paths.
      - sock_read_tx read-mostly fields in tx paths.
      
      Results on TCP_RR benchmarks seem to show a gain (4 to 5 %).
      
      It is possible UDP needs a change, because sk_peek_off
      shares a cache line with sk_receive_queue.
      If this the case, we can exchange roles of sk->sk_receive
      and up->reader_queue queues.
      
      After this change, we have the following layout:
      
      struct sock {
      	struct sock_common         __sk_common;          /*     0  0x88 */
      	/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
      	__u8                       __cacheline_group_begin__sock_write_rx[0]; /*  0x88     0 */
      	atomic_t                   sk_drops;             /*  0x88   0x4 */
      	__s32                      sk_peek_off;          /*  0x8c   0x4 */
      	struct sk_buff_head        sk_error_queue;       /*  0x90  0x18 */
      	struct sk_buff_head        sk_receive_queue;     /*  0xa8  0x18 */
      	/* --- cacheline 3 boundary (192 bytes) --- */
      	struct {
      		atomic_t           rmem_alloc;           /*  0xc0   0x4 */
      		int                len;                  /*  0xc4   0x4 */
      		struct sk_buff *   head;                 /*  0xc8   0x8 */
      		struct sk_buff *   tail;                 /*  0xd0   0x8 */
      	} sk_backlog;                                    /*  0xc0  0x18 */
      	struct {
      		atomic_t                   rmem_alloc;           /*     0   0x4 */
      		int                        len;                  /*   0x4   0x4 */
      		struct sk_buff *           head;                 /*   0x8   0x8 */
      		struct sk_buff *           tail;                 /*  0x10   0x8 */
      
      		/* size: 24, cachelines: 1, members: 4 */
      		/* last cacheline: 24 bytes */
      	};
      
      	__u8                       __cacheline_group_end__sock_write_rx[0]; /*  0xd8     0 */
      	__u8                       __cacheline_group_begin__sock_read_rx[0]; /*  0xd8     0 */
      	rcu *                      sk_rx_dst;            /*  0xd8   0x8 */
      	int                        sk_rx_dst_ifindex;    /*  0xe0   0x4 */
      	u32                        sk_rx_dst_cookie;     /*  0xe4   0x4 */
      	unsigned int               sk_ll_usec;           /*  0xe8   0x4 */
      	unsigned int               sk_napi_id;           /*  0xec   0x4 */
      	u16                        sk_busy_poll_budget;  /*  0xf0   0x2 */
      	u8                         sk_prefer_busy_poll;  /*  0xf2   0x1 */
      	u8                         sk_userlocks;         /*  0xf3   0x1 */
      	int                        sk_rcvbuf;            /*  0xf4   0x4 */
      	rcu *                      sk_filter;            /*  0xf8   0x8 */
      	/* --- cacheline 4 boundary (256 bytes) --- */
      	union {
      		rcu *              sk_wq;                /* 0x100   0x8 */
      		struct socket_wq * sk_wq_raw;            /* 0x100   0x8 */
      	};                                               /* 0x100   0x8 */
      	union {
      		rcu *                      sk_wq;                /*     0   0x8 */
      		struct socket_wq *         sk_wq_raw;            /*     0   0x8 */
      	};
      
      	void                       (*sk_data_ready)(struct sock *); /* 0x108   0x8 */
      	long                       sk_rcvtimeo;          /* 0x110   0x8 */
      	int                        sk_rcvlowat;          /* 0x118   0x4 */
      	__u8                       __cacheline_group_end__sock_read_rx[0]; /* 0x11c     0 */
      	__u8                       __cacheline_group_begin__sock_read_rxtx[0]; /* 0x11c     0 */
      	int                        sk_err;               /* 0x11c   0x4 */
      	struct socket *            sk_socket;            /* 0x120   0x8 */
      	struct mem_cgroup *        sk_memcg;             /* 0x128   0x8 */
      	rcu *                      sk_policy[2];         /* 0x130  0x10 */
      	/* --- cacheline 5 boundary (320 bytes) --- */
      	__u8                       __cacheline_group_end__sock_read_rxtx[0]; /* 0x140     0 */
      	__u8                       __cacheline_group_begin__sock_write_rxtx[0]; /* 0x140     0 */
      	socket_lock_t              sk_lock;              /* 0x140  0x20 */
      	u32                        sk_reserved_mem;      /* 0x160   0x4 */
      	int                        sk_forward_alloc;     /* 0x164   0x4 */
      	u32                        sk_tsflags;           /* 0x168   0x4 */
      	__u8                       __cacheline_group_end__sock_write_rxtx[0]; /* 0x16c     0 */
      	__u8                       __cacheline_group_begin__sock_write_tx[0]; /* 0x16c     0 */
      	int                        sk_write_pending;     /* 0x16c   0x4 */
      	atomic_t                   sk_omem_alloc;        /* 0x170   0x4 */
      	int                        sk_sndbuf;            /* 0x174   0x4 */
      	int                        sk_wmem_queued;       /* 0x178   0x4 */
      	refcount_t                 sk_wmem_alloc;        /* 0x17c   0x4 */
      	/* --- cacheline 6 boundary (384 bytes) --- */
      	unsigned long              sk_tsq_flags;         /* 0x180   0x8 */
      	union {
      		struct sk_buff *   sk_send_head;         /* 0x188   0x8 */
      		struct rb_root     tcp_rtx_queue;        /* 0x188   0x8 */
      	};                                               /* 0x188   0x8 */
      	union {
      		struct sk_buff *           sk_send_head;         /*     0   0x8 */
      		struct rb_root             tcp_rtx_queue;        /*     0   0x8 */
      	};
      
      	struct sk_buff_head        sk_write_queue;       /* 0x190  0x18 */
      	u32                        sk_dst_pending_confirm; /* 0x1a8   0x4 */
      	u32                        sk_pacing_status;     /* 0x1ac   0x4 */
      	struct page_frag           sk_frag;              /* 0x1b0  0x10 */
      	/* --- cacheline 7 boundary (448 bytes) --- */
      	struct timer_list          sk_timer;             /* 0x1c0  0x28 */
      
      	/* XXX last struct has 4 bytes of padding */
      
      	unsigned long              sk_pacing_rate;       /* 0x1e8   0x8 */
      	atomic_t                   sk_zckey;             /* 0x1f0   0x4 */
      	atomic_t                   sk_tskey;             /* 0x1f4   0x4 */
      	__u8                       __cacheline_group_end__sock_write_tx[0]; /* 0x1f8     0 */
      	__u8                       __cacheline_group_begin__sock_read_tx[0]; /* 0x1f8     0 */
      	unsigned long              sk_max_pacing_rate;   /* 0x1f8   0x8 */
      	/* --- cacheline 8 boundary (512 bytes) --- */
      	long                       sk_sndtimeo;          /* 0x200   0x8 */
      	u32                        sk_priority;          /* 0x208   0x4 */
      	u32                        sk_mark;              /* 0x20c   0x4 */
      	rcu *                      sk_dst_cache;         /* 0x210   0x8 */
      	netdev_features_t          sk_route_caps;        /* 0x218   0x8 */
      	u16                        sk_gso_type;          /* 0x220   0x2 */
      	u16                        sk_gso_max_segs;      /* 0x222   0x2 */
      	unsigned int               sk_gso_max_size;      /* 0x224   0x4 */
      	gfp_t                      sk_allocation;        /* 0x228   0x4 */
      	u32                        sk_txhash;            /* 0x22c   0x4 */
      	u8                         sk_pacing_shift;      /* 0x230   0x1 */
      	bool                       sk_use_task_frag;     /* 0x231   0x1 */
      	__u8                       __cacheline_group_end__sock_read_tx[0]; /* 0x232     0 */
      	u8                         sk_gso_disabled:1;    /* 0x232: 0 0x1 */
      	u8                         sk_kern_sock:1;       /* 0x232:0x1 0x1 */
      	u8                         sk_no_check_tx:1;     /* 0x232:0x2 0x1 */
      	u8                         sk_no_check_rx:1;     /* 0x232:0x3 0x1 */
      
      	/* XXX 4 bits hole, try to pack */
      
      	u8                         sk_shutdown;          /* 0x233   0x1 */
      	u16                        sk_type;              /* 0x234   0x2 */
      	u16                        sk_protocol;          /* 0x236   0x2 */
      	unsigned long              sk_lingertime;        /* 0x238   0x8 */
      	/* --- cacheline 9 boundary (576 bytes) --- */
      	struct proto *             sk_prot_creator;      /* 0x240   0x8 */
      	rwlock_t                   sk_callback_lock;     /* 0x248   0x8 */
      	int                        sk_err_soft;          /* 0x250   0x4 */
      	u32                        sk_ack_backlog;       /* 0x254   0x4 */
      	u32                        sk_max_ack_backlog;   /* 0x258   0x4 */
      	kuid_t                     sk_uid;               /* 0x25c   0x4 */
      	spinlock_t                 sk_peer_lock;         /* 0x260   0x4 */
      	int                        sk_bind_phc;          /* 0x264   0x4 */
      	struct pid *               sk_peer_pid;          /* 0x268   0x8 */
      	const struct cred  *       sk_peer_cred;         /* 0x270   0x8 */
      	ktime_t                    sk_stamp;             /* 0x278   0x8 */
      	/* --- cacheline 10 boundary (640 bytes) --- */
      	int                        sk_disconnects;       /* 0x280   0x4 */
      	u8                         sk_txrehash;          /* 0x284   0x1 */
      	u8                         sk_clockid;           /* 0x285   0x1 */
      	u8                         sk_txtime_deadline_mode:1; /* 0x286: 0 0x1 */
      	u8                         sk_txtime_report_errors:1; /* 0x286:0x1 0x1 */
      	u8                         sk_txtime_unused:6;   /* 0x286:0x2 0x1 */
      
      	/* XXX 1 byte hole, try to pack */
      
      	void *                     sk_user_data;         /* 0x288   0x8 */
      	void *                     sk_security;          /* 0x290   0x8 */
      	struct sock_cgroup_data    sk_cgrp_data;         /* 0x298   0x8 */
      	void                       (*sk_state_change)(struct sock *); /* 0x2a0   0x8 */
      	void                       (*sk_write_space)(struct sock *); /* 0x2a8   0x8 */
      	void                       (*sk_error_report)(struct sock *); /* 0x2b0   0x8 */
      	int                        (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /* 0x2b8   0x8 */
      	/* --- cacheline 11 boundary (704 bytes) --- */
      	void                       (*sk_destruct)(struct sock *); /* 0x2c0   0x8 */
      	rcu *                      sk_reuseport_cb;      /* 0x2c8   0x8 */
      	rcu *                      sk_bpf_storage;       /* 0x2d0   0x8 */
      	struct callback_head       sk_rcu __attribute__((__aligned__(8))); /* 0x2d8  0x10 */
      	netns_tracker              ns_tracker;           /* 0x2e8   0x8 */
      
      	/* size: 752, cachelines: 12, members: 105 */
      	/* sum members: 749, holes: 1, sum holes: 1 */
      	/* sum bitfield members: 12 bits, bit holes: 1, sum bit holes: 4 bits */
      	/* paddings: 1, sum paddings: 4 */
      	/* forced alignments: 1 */
      	/* last cacheline: 48 bytes */
      };
      Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
      Acked-by: default avatarPaolo Abeni <pabeni@redhat.com>
      Link: https://lore.kernel.org/r/20240216162006.2342759-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
      5d4cc874
    • Colin Ian King's avatar
      net: tcp: Remove redundant initialization of variable len · 465c1abc
      Colin Ian King authored
      The variable len being initialized with a value that is never read, an
      if statement is initializing it in both paths of the if statement.
      The initialization is redundant and can be removed.
      
      Cleans up clang scan build warning:
      net/ipv4/tcp_ao.c:512:11: warning: Value stored to 'len' during its
      initialization is never read [deadcode.DeadStores]
      Signed-off-by: default avatarColin Ian King <colin.i.king@gmail.com>
      Reviewed-by: default avatarDmitry Safonov <0x7f454c46@gmail.com>
      Link: https://lore.kernel.org/r/20240216125443.2107244-1-colin.i.king@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
      465c1abc
    • Paolo Abeni's avatar
      Merge branch 'abstract-page-from-net-stack' · bb18fc7a
      Paolo Abeni authored
      Mina Almasry says:
      
      ====================
      Abstract page from net stack
      
      This series is a prerequisite to the devmem TCP series. For a full
      snapshot of the code which includes these changes, feel free to check:
      
      https://github.com/mina/linux/commits/tcpdevmem-rfcv5/
      
      Currently these components in the net stack use the struct page
      directly:
      
      1. Drivers.
      2. Page pool.
      3. skb_frag_t.
      
      To add support for new (non struct page) memory types to the net stack, we
      must first abstract the current memory type.
      
      Originally the plan was to reuse struct page* for the new memory types,
      and to set the LSB on the page* to indicate it's not really a page.
      However, for safe compiler type checking we need to introduce a new type.
      
      struct netmem is introduced to abstract the underlying memory type.
      Currently it's a no-op abstraction that is always a struct page underneath.
      In parallel there is an undergoing effort to add support for devmem to the
      net stack:
      
      https://lore.kernel.org/netdev/20231208005250.2910004-1-almasrymina@google.com/
      
      Cc: Jason Gunthorpe <jgg@nvidia.com>
      Cc: Christian König <christian.koenig@amd.com>
      Cc: Shakeel Butt <shakeelb@google.com>
      Cc: Yunsheng Lin <linyunsheng@huawei.com>
      Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
      ====================
      
      Link: https://lore.kernel.org/r/20240214223405.1972973-1-almasrymina@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
      bb18fc7a
    • Mina Almasry's avatar
      net: add netmem to skb_frag_t · 21d2e673
      Mina Almasry authored
      Use struct netmem* instead of page in skb_frag_t. Currently struct
      netmem* is always a struct page underneath, but the abstraction
      allows efforts to add support for skb frags not backed by pages.
      
      There is unfortunately 1 instance where the skb_frag_t is assumed to be
      a exactly a bio_vec in kcm. For this case, WARN_ON_ONCE and return error
      before doing a cast.
      
      Add skb[_frag]_fill_netmem_*() and skb_add_rx_frag_netmem() helpers so
      that the API can be used to create netmem skbs.
      Signed-off-by: default avatarMina Almasry <almasrymina@google.com>
      Acked-by: default avatarPaolo Abeni <pabeni@redhat.com>
      Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
      21d2e673
    • Mina Almasry's avatar
      net: introduce abstraction for network memory · 18ddbf5c
      Mina Almasry authored
      Add the netmem_ref type, an abstraction for network memory.
      
      To add support for new memory types to the net stack, we must first
      abstract the current memory type. Currently parts of the net stack
      use struct page directly:
      
      - page_pool
      - drivers
      - skb_frag_t
      
      Originally the plan was to reuse struct page* for the new memory types,
      and to set the LSB on the page* to indicate it's not really a page.
      However, for compiler type checking we need to introduce a new type.
      
      netmem_ref is introduced to abstract the underlying memory type.
      Currently it's a no-op abstraction that is always a struct page
      underneath. In parallel there is an undergoing effort to add support
      for devmem to the net stack:
      
      https://lore.kernel.org/netdev/20231208005250.2910004-1-almasrymina@google.com/
      
      netmem_ref can be pointers to different underlying memory types, and the
      low bits are set to indicate the memory type. Helpers are provided
      to convert netmem pointers to the underlying memory type (currently only
      struct page). In the devmem series helpers are provided so that calling
      code can use netmem without worrying about the underlying memory type
      unless absolutely necessary.
      Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
      Signed-off-by: default avatarMina Almasry <almasrymina@google.com>
      Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
      18ddbf5c
  2. 19 Feb, 2024 11 commits
  3. 18 Feb, 2024 3 commits
  4. 17 Feb, 2024 6 commits
  5. 16 Feb, 2024 15 commits
    • Ivan Vecera's avatar
      i40e: Remove VEB recursion · f09cbb6c
      Ivan Vecera authored
      The VEB (virtual embedded switch) as a switch element can be
      connected according datasheet though its uplink to:
      - Physical port
      - Port Virtualizer (not used directly by i40e driver but can
        be present in MFP mode where the physical port is shared
        between PFs)
      - No uplink (aka floating VEB)
      
      But VEB uplink cannot be connected to another VEB and any attempt
      to do so results in:
      
      "i40e 0000:02:00.0: couldn't add VEB, err -EIO aq_err I40E_AQ_RC_ENOENT"
      
      that indicates "the uplink SEID does not point to valid element".
      
      Remove this logic from the driver code this way:
      
      1) For debugfs only allow to build floating VEB (uplink_seid == 0)
         or main VEB (uplink_seid == mac_seid)
      2) Do not recurse in i40e_veb_link_event() as no VEB cannot have
         sub-VEBs
      3) Ditto for i40e_veb_rebuild() + simplify the function as we know
         that the VEB for rebuild can be only the main LAN VEB or some
         of the floating VEBs
      4) In i40e_rebuild() there is no need to check veb->uplink_seid
         as the possible ones are 0 and MAC SEID
      5) In i40e_vsi_release() do not take into account VEBs whose
         uplink is another VEB as this is not possible
      6) Remove veb_idx field from i40e_veb as a VEB cannot have
         sub-VEBs
      
      Tested using i40e debugfs interface:
      1) Initial state
      [root@cnb-03 net-next]# CMD="/sys/kernel/debug/i40e/0000:02:00.0/command"
      [root@cnb-03 net-next]# echo dump switch > $CMD
      [root@cnb-03 net-next]# dmesg -c
      [   98.440641] i40e 0000:02:00.0: header: 3 reported 3 total
      [   98.446053] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [   98.452593] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [   98.458856] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      2) Add floating VEB
      [root@cnb-03 net-next]# echo add relay > $CMD
      [root@cnb-03 net-next]# dmesg -c
      [  122.745630] i40e 0000:02:00.0: added relay 162
      [root@cnb-03 net-next]# echo dump switch > $CMD
      [root@cnb-03 net-next]# dmesg -c
      [  136.650049] i40e 0000:02:00.0: header: 4 reported 4 total
      [  136.655466] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  136.661994] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  136.668264] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      [  136.674787] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      
      3) Add VMDQ2 VSI to this new VEB
      [root@cnb-03 net-next]# dmesg -c
      [  168.351763] i40e 0000:02:00.0: added VSI 394 to relay 162
      [  168.374652] enp2s0f0np0v0: NIC Link is Up, 40 Gbps Full Duplex, Flow Control: None
      [root@cnb-03 net-next]# echo dump switch > $CMD
      [root@cnb-03 net-next]# dmesg -c
      [  195.683204] i40e 0000:02:00.0: header: 5 reported 5 total
      [  195.688611] i40e 0000:02:00.0: type=19 seid=394 uplink=162 downlink=16
      [  195.695143] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      [  195.701410] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  195.707935] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  195.714201] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      4) Try to delete the VEB
      [root@cnb-03 net-next]# echo del relay 162 > $CMD
      [root@cnb-03 net-next]# dmesg -c
      [  239.260901] i40e 0000:02:00.0: deleting relay 162
      [  239.265621] i40e 0000:02:00.0: can't remove VEB 162 with 1 VSIs left
      
      5) Do PF reset and check switch status after rebuild
      [root@cnb-03 net-next]# echo pfr > $CMD
      [root@cnb-03 net-next]# echo dump switch > $CMD
      [root@cnb-03 net-next]# dmesg -c
      ...
      [  272.333655] i40e 0000:02:00.0: header: 5 reported 5 total
      [  272.339066] i40e 0000:02:00.0: type=19 seid=394 uplink=162 downlink=16
      [  272.345599] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      [  272.351862] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  272.358387] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  272.364654] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      6) Delete VSI and delete VEB
      [  297.199116] i40e 0000:02:00.0: deleting VSI 394
      [  299.807580] i40e 0000:02:00.0: deleting relay 162
      [  309.767905] i40e 0000:02:00.0: header: 3 reported 3 total
      [  309.773318] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  309.779845] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  309.786111] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      Reviewed-by: default avatarWojciech Drewek <wojciech.drewek@intel.com>
      Signed-off-by: default avatarIvan Vecera <ivecera@redhat.com>
      Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
      Reviewed-by: default avatarSimon Horman <horms@kernel.org>
      Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
      f09cbb6c
    • Ivan Vecera's avatar
      i40e: Fix broken support for floating VEBs · 08cdde31
      Ivan Vecera authored
      Although the i40e supports so-called floating VEB (VEB without
      an uplink connection to external network), this support is
      broken. This functionality is currently unused (except debugfs)
      but it will be used by subsequent series for switchdev mode
      slow-path. Fix this by following:
      
      1) Handle correctly floating VEB (VEB with uplink_seid == 0)
         in i40e_reconstitute_veb() and look for owner VSI and
         create it only for non-floating VEBs and also set bridge
         mode only for such VEBs as the floating ones are using
         always VEB mode.
      2) Handle correctly floating VEB in i40e_veb_release() and
         disallow its release when there are some VSIs. This is
         different from regular VEB that have owner VSI that is
         connected to VEB's uplink after VEB deletion by FW.
      3) Fix i40e_add_veb() to handle 'vsi' that is NULL for floating
         VEBs. For floating VEB use 0 for downlink SEID and 'true'
         for 'default_port' parameters as per datasheet.
      4) Fix 'add relay' command in i40e_dbg_command_write() to allow
         to create floating VEB by 'add relay 0 0' or 'add relay'
      
      Tested using debugfs:
      1) Initial state
      [root@host net-next]# echo dump switch > $CMD
      [root@host net-next]# dmesg -c
      [  173.701286] i40e 0000:02:00.0: header: 3 reported 3 total
      [  173.706701] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  173.713241] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  173.719507] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      2) Add floating VEB
      [root@host net-next]# CMD="/sys/kernel/debug/i40e/0000:02:00.0/command"
      [root@host net-next]# echo add relay > $CMD
      [root@host net-next]# dmesg -c
      [  245.551720] i40e 0000:02:00.0: added relay 162
      [root@host net-next]# echo dump switch > $CMD
      [root@host net-next]# dmesg -c
      [  276.984371] i40e 0000:02:00.0: header: 4 reported 4 total
      [  276.989779] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  276.996302] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  277.002569] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      [  277.009091] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      
      3) Add VMDQ2 VSI to this new VEB
      [root@host net-next]# echo add vsi 162 > $CMD
      [root@host net-next]# dmesg -c
      [  332.314030] i40e 0000:02:00.0: added VSI 394 to relay 162
      [  332.337486] enp2s0f0np0v0: NIC Link is Up, 40 Gbps Full Duplex, Flow Control: None
      [root@host net-next]# echo dump switch > $CMD
      [root@host net-next]# dmesg -c
      [  387.284490] i40e 0000:02:00.0: header: 5 reported 5 total
      [  387.289904] i40e 0000:02:00.0: type=19 seid=394 uplink=162 downlink=16
      [  387.296446] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      [  387.302708] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  387.309234] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  387.315500] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      4) Try to delete the VEB
      [root@host net-next]# echo del relay 162 > $CMD
      [root@host net-next]# dmesg -c
      [  428.749297] i40e 0000:02:00.0: deleting relay 162
      [  428.754011] i40e 0000:02:00.0: can't remove VEB 162 with 1 VSIs left
      
      5) Do PF reset and check switch status after rebuild
      [root@host net-next]# echo pfr > $CMD
      [root@host net-next]# echo dump switch > $CMD
      [root@host net-next]# dmesg -c
      [  738.056172] i40e 0000:02:00.0: header: 5 reported 5 total
      [  738.061577] i40e 0000:02:00.0: type=19 seid=394 uplink=162 downlink=16
      [  738.068104] i40e 0000:02:00.0: type=17 seid=162 uplink=0 downlink=0
      [  738.074367] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [  738.080892] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [  738.087160] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      
      6) Delete VSI and delete VEB
      [root@host net-next]# echo del vsi 394 > $CMD
      [root@host net-next]# echo del relay 162 > $CMD
      [root@host net-next]# echo dump switch > $CMD
      [root@host net-next]# dmesg -c
      [ 1233.081126] i40e 0000:02:00.0: deleting VSI 394
      [ 1239.345139] i40e 0000:02:00.0: deleting relay 162
      [ 1244.886920] i40e 0000:02:00.0: header: 3 reported 3 total
      [ 1244.892328] i40e 0000:02:00.0: type=19 seid=392 uplink=160 downlink=16
      [ 1244.898853] i40e 0000:02:00.0: type=17 seid=160 uplink=2 downlink=0
      [ 1244.905119] i40e 0000:02:00.0: type=19 seid=390 uplink=160 downlink=16
      Reviewed-by: default avatarWojciech Drewek <wojciech.drewek@intel.com>
      Signed-off-by: default avatarIvan Vecera <ivecera@redhat.com>
      Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
      Reviewed-by: default avatarSimon Horman <horms@kernel.org>
      Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
      08cdde31
    • Ivan Vecera's avatar
      i40e: Add helpers to find VSI and VEB by SEID and use them · b7fac08d
      Ivan Vecera authored
      Add two helpers i40e_(veb|vsi)_get_by_seid() to find corresponding
      VEB or VSI by their SEID value and use these helpers to replace
      existing open-coded loops.
      Reviewed-by: default avatarWojciech Drewek <wojciech.drewek@intel.com>
      Signed-off-by: default avatarIvan Vecera <ivecera@redhat.com>
      Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
      Reviewed-by: default avatarSimon Horman <horms@kernel.org>
      Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
      b7fac08d
    • Ivan Vecera's avatar
      i40e: Introduce and use macros for iterating VSIs and VEBs · b1f1b46f
      Ivan Vecera authored
      Introduce i40e_for_each_vsi() and i40e_for_each_veb() helper
      macros and use them to iterate relevant arrays.
      
      Replace pattern:
      for (i = 0; i < pf->num_alloc_vsi; i++)
      by:
      i40e_for_each_vsi(pf, i, vsi)
      
      and pattern:
      for (i = 0; i < I40E_MAX_VEB; i++)
      by
      i40e_for_each_veb(pf, i, veb)
      
      These macros also check if array item pf->vsi[i] or pf->veb[i]
      are not NULL and skip such items so we can remove redundant
      checks from loop bodies.
      Reviewed-by: default avatarWojciech Drewek <wojciech.drewek@intel.com>
      Signed-off-by: default avatarIvan Vecera <ivecera@redhat.com>
      Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
      Reviewed-by: default avatarSimon Horman <horms@kernel.org>
      Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
      b1f1b46f
    • Ivan Vecera's avatar
      i40e: Use existing helper to find flow director VSI · 7e6cec7d
      Ivan Vecera authored
      Use existing i40e_find_vsi_by_type() to find a VSI
      associated with flow director.
      Reviewed-by: default avatarWojciech Drewek <wojciech.drewek@intel.com>
      Signed-off-by: default avatarIvan Vecera <ivecera@redhat.com>
      Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
      Reviewed-by: default avatarSimon Horman <horms@kernel.org>
      Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
      7e6cec7d
    • Christian Marangi's avatar
      net: phy: aquantia: add AQR113 PHY ID · 71b605d3
      Christian Marangi authored
      Add Aquantia AQR113 PHY ID. Aquantia AQR113 is just a chip size variant of
      the already supported AQR133C where the only difference is the PHY ID
      and the hw chip size.
      Signed-off-by: default avatarChristian Marangi <ansuelsmth@gmail.com>
      Reviewed-by: default avatarAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      71b605d3
    • Diogo Ivo's avatar
      net: ti: icssg-prueth: Remove duplicate cleanup calls in emac_ndo_stop() · 1d085e9c
      Diogo Ivo authored
      Remove the duplicate calls to prueth_emac_stop() and
      prueth_cleanup_tx_chns() in emac_ndo_stop().
      Signed-off-by: default avatarDiogo Ivo <diogo.ivo@siemens.com>
      Reviewed-by: default avatarRoger Quadros <rogerq@kernel.org>
      Reviewed-by: default avatarMD Danish Anwar <danishanwar@ti.com>
      Reviewed-by: default avatarDan Carpenter <dan.carpenter@linaro.org>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      1d085e9c
    • Geert Uytterhoeven's avatar
      tcp: Spelling s/curcuit/circuit/ · 21bd52ea
      Geert Uytterhoeven authored
      Fix a misspelling of "circuit".
      Signed-off-by: default avatarGeert Uytterhoeven <geert+renesas@glider.be>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      21bd52ea
    • David S. Miller's avatar
      Merge branch 'ionic-xdp-support' · d70a2a45
      David S. Miller authored
      Shannon Nelson says:
      
      ====================
      ionic: add XDP support
      
      This patchset is new support in ionic for XDP processing,
      including basic XDP on Rx packets, TX and REDIRECT, and frags
      for jumbo frames.
      
      Since ionic has not yet been converted to use the page_pool APIs,
      this uses the simple MEM_TYPE_PAGE_ORDER0 buffering.  There are plans
      to convert the driver in the near future.
      
      v4:
       - removed "inline" from short utility functions
       - changed to use "goto err_out" in ionic_xdp_register_rxq_info()
       - added "continue" to reduce nesting in ionic_xdp_queues_config()
       - used xdp_prog in ionic_rx_clean() to flag whether or not to sync
         the rx buffer after calling ionix_xdp_run()
       - swapped order of XDP_TX and XDP_REDIRECT cases in ionic_xdp_run()
         to make patch 6 a little cleaner
      
      v3:
      https://lore.kernel.org/netdev/20240210004827.53814-1-shannon.nelson@amd.com/
       - removed budget==0 patch, sent it separately to net
      
      v2:
      https://lore.kernel.org/netdev/20240208005725.65134-1-shannon.nelson@amd.com/
       - added calls to txq_trans_cond_update()
       - added a new patch to catch NAPI budget==0
      
      v1:
      https://lore.kernel.org/netdev/20240130013042.11586-1-shannon.nelson@amd.com/
      
      RFC:
      https://lore.kernel.org/netdev/20240118192500.58665-1-shannon.nelson@amd.com/
      ====================
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      d70a2a45
    • Shannon Nelson's avatar
      ionic: implement xdp frags support · 5377805d
      Shannon Nelson authored
      Add support for using scatter-gather / frags in XDP in both
      Rx and Tx paths.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      5377805d
    • Shannon Nelson's avatar
      ionic: add ndo_xdp_xmit · 26f5726a
      Shannon Nelson authored
      When our ndo_xdp_xmit is called we mark the buffer with
      XDP_REDIRECT so we know to return it to the XDP stack for
      cleaning.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      26f5726a
    • Shannon Nelson's avatar
      ionic: Add XDP_REDIRECT support · 587fc3f0
      Shannon Nelson authored
      The XDP_REDIRECT packets are given to the XDP stack and
      we drop the use of the related page: it will get freed
      by the driver that ends up doing the Tx.  Because we have
      some hardware configurations with limited queue resources,
      we use the existing datapath Tx queues rather than creating
      and managing a separate set of xdp_tx queues.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      587fc3f0
    • Shannon Nelson's avatar
      ionic: Add XDP_TX support · 8eeed837
      Shannon Nelson authored
      The XDP_TX packets get fed back into the Rx queue's partnered
      Tx queue as an xdp_frame.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      8eeed837
    • Shannon Nelson's avatar
      ionic: Add XDP packet headroom · f81da39b
      Shannon Nelson authored
      If an xdp program is loaded, add headroom at the beginning
      of the frame to allow for editing and insertions that an XDP
      program might need room for, and tailroom used later for XDP
      frame tracking.  These are only needed in the first Rx buffer
      in a packet, not for any trailing frags.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      f81da39b
    • Shannon Nelson's avatar
      ionic: add initial framework for XDP support · 180e35cd
      Shannon Nelson authored
      Set up the basics for running Rx packets through XDP programs.
      Add new queue setup and teardown steps for adding/removing an
      XDP program, and add the call to run the XDP on a packet.
      
      The XDP frame size needs to be the MTU plus standard ethernet
      header, plus head room for XDP scribblings and tail room for a
      struct skb_shared_info.  Also, at this point, we don't support
      XDP frags, only a single contiguous Rx buffer.  This means
      that our page splitting is not very useful, so when XDP is in
      use we need to use the full Rx buffer size and not do sharing.
      Co-developed-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarBrett Creeley <brett.creeley@amd.com>
      Signed-off-by: default avatarShannon Nelson <shannon.nelson@amd.com>
      Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      180e35cd