Commit 463713eb authored by Jorgen Hansen's avatar Jorgen Hansen Committed by Greg Kroah-Hartman

VMCI: dma dg: add support for DMA datagrams receive

Use the DMA based receive operation instead of the ioread8_rep
based datagram receive when DMA datagrams are supported.

In the receive operation, configure the header to point to the
page aligned VMCI_MAX_DG_SIZE part of the receive buffer
using s/g configuration for the header. This ensures that the
existing dispatch routine can be used with little modification.
Initiate the receive by writing the lower 32 bit of the buffer
to the VMCI_DATA_IN_LOW_ADDR register, and wait for the busy
flag to be changed by the device using a wait queue.

The existing dispatch routine for received  datagrams is reused
for the DMA datagrams with a few modifications:
- the receive buffer is always the maximum size for DMA datagrams
  (IO ports would try with a shorter buffer first to reduce
  overhead of the ioread8_rep operation).
- for DMA datagrams, datagrams are provided contiguous in the
  buffer as opposed to IO port datagrams, where they can start
  on any page boundary
Reviewed-by: default avatarVishnu Dasa <vdasa@vmware.com>
Signed-off-by: default avatarJorgen Hansen <jhansen@vmware.com>
Link: https://lore.kernel.org/r/20220207102725.2742-9-jhansen@vmware.comSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 22aa5c7f
...@@ -58,6 +58,7 @@ struct vmci_guest_device { ...@@ -58,6 +58,7 @@ struct vmci_guest_device {
struct tasklet_struct datagram_tasklet; struct tasklet_struct datagram_tasklet;
struct tasklet_struct bm_tasklet; struct tasklet_struct bm_tasklet;
struct wait_queue_head inout_wq;
void *data_buffer; void *data_buffer;
dma_addr_t data_buffer_base; dma_addr_t data_buffer_base;
...@@ -115,6 +116,36 @@ static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) ...@@ -115,6 +116,36 @@ static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
iowrite32(val, dev->iobase + reg); iowrite32(val, dev->iobase + reg);
} }
static void vmci_read_data(struct vmci_guest_device *vmci_dev,
void *dest, size_t size)
{
if (vmci_dev->mmio_base == NULL)
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
dest, size);
else {
/*
* For DMA datagrams, the data_buffer will contain the header on the
* first page, followed by the incoming datagram(s) on the following
* pages. The header uses an S/G element immediately following the
* header on the first page to point to the data area.
*/
struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
size_t buffer_offset = dest - vmci_dev->data_buffer;
buffer_header->opcode = 1;
buffer_header->size = 1;
buffer_header->busy = 0;
sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
sg_array[0].size = size;
vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
VMCI_DATA_IN_LOW_ADDR);
wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
}
}
static int vmci_write_data(struct vmci_guest_device *dev, static int vmci_write_data(struct vmci_guest_device *dev,
struct vmci_datagram *dg) struct vmci_datagram *dg)
{ {
...@@ -261,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev) ...@@ -261,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
} }
/* /*
* Reads datagrams from the data in port and dispatches them. We * Reads datagrams from the device and dispatches them. For IO port
* always start reading datagrams into only the first page of the * based access to the device, we always start reading datagrams into
* datagram buffer. If the datagrams don't fit into one page, we * only the first page of the datagram buffer. If the datagrams don't
* use the maximum datagram buffer size for the remainder of the * fit into one page, we use the maximum datagram buffer size for the
* invocation. This is a simple heuristic for not penalizing * remainder of the invocation. This is a simple heuristic for not
* small datagrams. * penalizing small datagrams. For DMA-based datagrams, we always
* use the maximum datagram buffer size, since there is no performance
* penalty for doing so.
* *
* This function assumes that it has exclusive access to the data * This function assumes that it has exclusive access to the data
* in port for the duration of the call. * in register(s) for the duration of the call.
*/ */
static void vmci_dispatch_dgs(unsigned long data) static void vmci_dispatch_dgs(unsigned long data)
{ {
...@@ -277,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data) ...@@ -277,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data)
u8 *dg_in_buffer = vmci_dev->data_buffer; u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg; struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
size_t current_dg_in_buffer_size = PAGE_SIZE; size_t current_dg_in_buffer_size;
size_t remaining_bytes; size_t remaining_bytes;
bool is_io_port = vmci_dev->mmio_base == NULL;
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, if (!is_io_port) {
vmci_dev->data_buffer, current_dg_in_buffer_size); /* For mmio, the first page is used for the header. */
dg_in_buffer += PAGE_SIZE;
/*
* For DMA-based datagram operations, there is no performance
* penalty for reading the maximum buffer size.
*/
current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
} else {
current_dg_in_buffer_size = PAGE_SIZE;
}
vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
dg = (struct vmci_datagram *)dg_in_buffer; dg = (struct vmci_datagram *)dg_in_buffer;
remaining_bytes = current_dg_in_buffer_size; remaining_bytes = current_dg_in_buffer_size;
/*
* Read through the buffer until an invalid datagram header is
* encountered. The exit condition for datagrams read through
* VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
* can start on any page boundary in the buffer.
*/
while (dg->dst.resource != VMCI_INVALID_ID || while (dg->dst.resource != VMCI_INVALID_ID ||
remaining_bytes > PAGE_SIZE) { (is_io_port && remaining_bytes > PAGE_SIZE)) {
unsigned dg_in_size; unsigned dg_in_size;
/* /*
* When the input buffer spans multiple pages, a datagram can * If using VMCI_DATA_IN_ADDR, skip to the next page
* start on any page boundary in the buffer. * as a datagram can start on any page boundary.
*/ */
if (dg->dst.resource == VMCI_INVALID_ID) { if (dg->dst.resource == VMCI_INVALID_ID) {
dg = (struct vmci_datagram *)roundup( dg = (struct vmci_datagram *)roundup(
...@@ -343,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data) ...@@ -343,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data)
current_dg_in_buffer_size = current_dg_in_buffer_size =
dg_in_buffer_size; dg_in_buffer_size;
ioread8_rep(vmci_dev->iobase + vmci_read_data(vmci_dev,
VMCI_DATA_IN_ADDR, dg_in_buffer +
vmci_dev->data_buffer +
remaining_bytes, remaining_bytes,
current_dg_in_buffer_size - current_dg_in_buffer_size -
remaining_bytes); remaining_bytes);
} }
...@@ -385,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data) ...@@ -385,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data)
current_dg_in_buffer_size = dg_in_buffer_size; current_dg_in_buffer_size = dg_in_buffer_size;
for (;;) { for (;;) {
ioread8_rep(vmci_dev->iobase + vmci_read_data(vmci_dev, dg_in_buffer,
VMCI_DATA_IN_ADDR, current_dg_in_buffer_size);
vmci_dev->data_buffer,
current_dg_in_buffer_size);
if (bytes_to_skip <= current_dg_in_buffer_size) if (bytes_to_skip <= current_dg_in_buffer_size)
break; break;
...@@ -405,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data) ...@@ -405,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data)
if (remaining_bytes < VMCI_DG_HEADERSIZE) { if (remaining_bytes < VMCI_DG_HEADERSIZE) {
/* Get the next batch of datagrams. */ /* Get the next batch of datagrams. */
ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, vmci_read_data(vmci_dev, dg_in_buffer,
vmci_dev->data_buffer,
current_dg_in_buffer_size); current_dg_in_buffer_size);
dg = (struct vmci_datagram *)dg_in_buffer; dg = (struct vmci_datagram *)dg_in_buffer;
remaining_bytes = current_dg_in_buffer_size; remaining_bytes = current_dg_in_buffer_size;
...@@ -464,8 +511,11 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) ...@@ -464,8 +511,11 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
icr &= ~VMCI_ICR_NOTIFICATION; icr &= ~VMCI_ICR_NOTIFICATION;
} }
if (icr & VMCI_ICR_DMA_DATAGRAM)
if (icr & VMCI_ICR_DMA_DATAGRAM) {
wake_up_all(&dev->inout_wq);
icr &= ~VMCI_ICR_DMA_DATAGRAM; icr &= ~VMCI_ICR_DMA_DATAGRAM;
}
if (icr != 0) if (icr != 0)
dev_warn(dev->dev, dev_warn(dev->dev,
...@@ -498,6 +548,10 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) ...@@ -498,6 +548,10 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
*/ */
static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
{ {
struct vmci_guest_device *dev = _dev;
wake_up_all(&dev->inout_wq);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -584,6 +638,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, ...@@ -584,6 +638,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_dispatch_dgs, (unsigned long)vmci_dev); vmci_dispatch_dgs, (unsigned long)vmci_dev);
tasklet_init(&vmci_dev->bm_tasklet, tasklet_init(&vmci_dev->bm_tasklet,
vmci_process_bitmap, (unsigned long)vmci_dev); vmci_process_bitmap, (unsigned long)vmci_dev);
init_waitqueue_head(&vmci_dev->inout_wq);
if (mmio_base != NULL) { if (mmio_base != NULL) {
vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment