Commit 7f953ab2 authored by Sowmini Varadhan's avatar Sowmini Varadhan Committed by David S. Miller

af_packet: TX_RING support for TPACKET_V3

Although TPACKET_V3 Rx has some benefits over TPACKET_V2 Rx, *_v3
does not currently have TX_RING support. As a result an application
that wants the best perf for Tx and Rx (e.g. to handle request/response
transacations) ends up needing 2 sockets, one with *_v2 for Tx and
another with *_v3 for Rx.

This patch enables TPACKET_V2 compatible Tx features in TPACKET_V3
so that an application can use a single descriptor to get the benefits
of _v3 RX_RING and _v2 TX_RING. An application may do a block-send by
first filling up multiple frames in the Tx ring and then triggering a
transmit. This patch only support fixed size Tx frames for TPACKET_V3,
and requires that tp_next_offset must be zero.
Signed-off-by: default avatarSowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e7072f66
...@@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2: ...@@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2:
(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
TPACKET_V2 --> TPACKET_V3: TPACKET_V2 --> TPACKET_V3:
- Flexible buffer implementation: - Flexible buffer implementation for RX_RING:
1. Blocks can be configured with non-static frame-size 1. Blocks can be configured with non-static frame-size
2. Read/poll is at a block-level (as opposed to packet-level) 2. Read/poll is at a block-level (as opposed to packet-level)
3. Added poll timeout to avoid indefinite user-space wait 3. Added poll timeout to avoid indefinite user-space wait
...@@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3: ...@@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3:
4.1 block::timeout 4.1 block::timeout
4.2 tpkt_hdr::sk_rxhash 4.2 tpkt_hdr::sk_rxhash
- RX Hash data available in user space - RX Hash data available in user space
- Currently only RX_RING available - TX_RING semantics are conceptually similar to TPACKET_V2;
use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
instead of TPACKET2_HDRLEN. In the current implementation,
the tp_next_offset field in the tpacket3_hdr MUST be set to
zero, indicating that the ring does not hold variable sized frames.
Packets with non-zero values of tp_next_offset will be dropped.
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
+ AF_PACKET fanout mode + AF_PACKET fanout mode
......
...@@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) ...@@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
flush_dcache_page(pgv_to_page(&h.h2->tp_status)); flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break; break;
case TPACKET_V3: case TPACKET_V3:
h.h3->tp_status = status;
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default: default:
WARN(1, "TPACKET version not supported.\n"); WARN(1, "TPACKET version not supported.\n");
BUG(); BUG();
...@@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame) ...@@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
flush_dcache_page(pgv_to_page(&h.h2->tp_status)); flush_dcache_page(pgv_to_page(&h.h2->tp_status));
return h.h2->tp_status; return h.h2->tp_status;
case TPACKET_V3: case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
return h.h3->tp_status;
default: default:
WARN(1, "TPACKET version not supported.\n"); WARN(1, "TPACKET version not supported.\n");
BUG(); BUG();
...@@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, ...@@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
ph.raw = frame; ph.raw = frame;
switch (po->tp_version) { switch (po->tp_version) {
case TPACKET_V3:
if (ph.h3->tp_next_offset != 0) {
pr_warn_once("variable sized slot not supported");
return -EINVAL;
}
tp_len = ph.h3->tp_len;
break;
case TPACKET_V2: case TPACKET_V2:
tp_len = ph.h2->tp_len; tp_len = ph.h2->tp_len;
break; break;
...@@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, ...@@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
off_max = po->tx_ring.frame_size - tp_len; off_max = po->tx_ring.frame_size - tp_len;
if (po->sk.sk_type == SOCK_DGRAM) { if (po->sk.sk_type == SOCK_DGRAM) {
switch (po->tp_version) { switch (po->tp_version) {
case TPACKET_V3:
off = ph.h3->tp_net;
break;
case TPACKET_V2: case TPACKET_V2:
off = ph.h2->tp_net; off = ph.h2->tp_net;
break; break;
...@@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, ...@@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
} }
} else { } else {
switch (po->tp_version) { switch (po->tp_version) {
case TPACKET_V3:
off = ph.h3->tp_mac;
break;
case TPACKET_V2: case TPACKET_V2:
off = ph.h2->tp_mac; off = ph.h2->tp_mac;
break; break;
...@@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, ...@@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
struct tpacket_req *req = &req_u->req; struct tpacket_req *req = &req_u->req;
lock_sock(sk); lock_sock(sk);
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
net_warn_ratelimited("Tx-ring is not supported.\n");
goto out;
}
rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb = tx_ring ? &po->tx_ring : &po->rx_ring;
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
...@@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, ...@@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out; goto out;
switch (po->tp_version) { switch (po->tp_version) {
case TPACKET_V3: case TPACKET_V3:
/* Transmit path is not supported. We checked /* Block transmit is not supported yet */
* it above but just being paranoid if (!tx_ring) {
*/
if (!tx_ring)
init_prb_bdqc(po, rb, pg_vec, req_u); init_prb_bdqc(po, rb, pg_vec, req_u);
} else {
struct tpacket_req3 *req3 = &req_u->req3;
if (req3->tp_retire_blk_tov ||
req3->tp_sizeof_priv ||
req3->tp_feature_req_word) {
err = -EINVAL;
goto out;
}
}
break; break;
default: default:
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment