Commit e2fdf784 authored by K. Y. Srinivasan's avatar K. Y. Srinivasan Committed by Greg Kroah-Hartman

Drivers: hv: vmbus: On write cleanup the logic to interrupt the host

commit 1f6ee4e7 upstream.

Signal the host when we determine the host is to be signaled.
The currrent code determines the need to signal in the ringbuffer
code and actually issues the signal elsewhere. This can result
in the host viewing this interrupt as spurious since the host may also
poll the channel. Make the necessary adjustments.
Signed-off-by: default avatarK. Y. Srinivasan <kys@microsoft.com>
Cc: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent afbb98f9
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
* vmbus_setevent- Trigger an event notification on the specified * vmbus_setevent- Trigger an event notification on the specified
* channel. * channel.
*/ */
static void vmbus_setevent(struct vmbus_channel *channel) void vmbus_setevent(struct vmbus_channel *channel)
{ {
struct hv_monitor_page *monitorpage; struct hv_monitor_page *monitorpage;
...@@ -65,6 +65,7 @@ static void vmbus_setevent(struct vmbus_channel *channel) ...@@ -65,6 +65,7 @@ static void vmbus_setevent(struct vmbus_channel *channel)
vmbus_set_event(channel); vmbus_set_event(channel);
} }
} }
EXPORT_SYMBOL_GPL(vmbus_setevent);
/* /*
* vmbus_open - Open the specified channel. * vmbus_open - Open the specified channel.
...@@ -635,8 +636,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, ...@@ -635,8 +636,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64)); u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
struct kvec bufferlist[3]; struct kvec bufferlist[3];
u64 aligned_data = 0; u64 aligned_data = 0;
int ret;
bool signal = false;
bool lock = channel->acquire_ring_lock; bool lock = channel->acquire_ring_lock;
int num_vecs = ((bufferlen != 0) ? 3 : 1); int num_vecs = ((bufferlen != 0) ? 3 : 1);
...@@ -656,41 +655,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, ...@@ -656,41 +655,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen); bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, return hv_ringbuffer_write(channel, bufferlist, num_vecs,
&signal, lock, channel->signal_policy); lock, kick_q);
/*
* Signalling the host is conditional on many factors:
* 1. The ring state changed from being empty to non-empty.
* This is tracked by the variable "signal".
* 2. The variable kick_q tracks if more data will be placed
* on the ring. We will not signal if more data is
* to be placed.
*
* Based on the channel signal state, we will decide
* which signaling policy will be applied.
*
* If we cannot write to the ring-buffer; signal the host
* even if we may not have written anything. This is a rare
* enough condition that it should not matter.
* NOTE: in this case, the hvsock channel is an exception, because
* it looks the host side's hvsock implementation has a throttling
* mechanism which can hurt the performance otherwise.
*
* KYS: Oct. 30, 2016:
* It looks like Windows hosts have logic to deal with DOS attacks that
* can be triggered if it receives interrupts when it is not expecting
* the interrupt. The host expects interrupts only when the ring
* transitions from empty to non-empty (or full to non full on the guest
* to host ring).
* So, base the signaling decision solely on the ring state until the
* host logic is fixed.
*/
if (((ret == 0) && signal))
vmbus_setevent(channel);
return ret;
} }
EXPORT_SYMBOL(vmbus_sendpacket_ctl); EXPORT_SYMBOL(vmbus_sendpacket_ctl);
...@@ -731,7 +698,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, ...@@ -731,7 +698,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
u32 flags, u32 flags,
bool kick_q) bool kick_q)
{ {
int ret;
int i; int i;
struct vmbus_channel_packet_page_buffer desc; struct vmbus_channel_packet_page_buffer desc;
u32 descsize; u32 descsize;
...@@ -739,7 +705,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, ...@@ -739,7 +705,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
u32 packetlen_aligned; u32 packetlen_aligned;
struct kvec bufferlist[3]; struct kvec bufferlist[3];
u64 aligned_data = 0; u64 aligned_data = 0;
bool signal = false;
bool lock = channel->acquire_ring_lock; bool lock = channel->acquire_ring_lock;
if (pagecount > MAX_PAGE_BUFFER_COUNT) if (pagecount > MAX_PAGE_BUFFER_COUNT)
...@@ -777,38 +742,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, ...@@ -777,38 +742,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen); bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, return hv_ringbuffer_write(channel, bufferlist, 3,
&signal, lock, channel->signal_policy); lock, kick_q);
/*
* Signalling the host is conditional on many factors:
* 1. The ring state changed from being empty to non-empty.
* This is tracked by the variable "signal".
* 2. The variable kick_q tracks if more data will be placed
* on the ring. We will not signal if more data is
* to be placed.
*
* Based on the channel signal state, we will decide
* which signaling policy will be applied.
*
* If we cannot write to the ring-buffer; signal the host
* even if we may not have written anything. This is a rare
* enough condition that it should not matter.
*
* KYS: Oct. 30, 2016:
* It looks like Windows hosts have logic to deal with DOS attacks that
* can be triggered if it receives interrupts when it is not expecting
* the interrupt. The host expects interrupts only when the ring
* transitions from empty to non-empty (or full to non full on the guest
* to host ring).
* So, base the signaling decision solely on the ring state until the
* host logic is fixed.
*/
if (((ret == 0) && signal))
vmbus_setevent(channel);
return ret;
} }
EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl); EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
...@@ -839,12 +774,10 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, ...@@ -839,12 +774,10 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
u32 desc_size, u32 desc_size,
void *buffer, u32 bufferlen, u64 requestid) void *buffer, u32 bufferlen, u64 requestid)
{ {
int ret;
u32 packetlen; u32 packetlen;
u32 packetlen_aligned; u32 packetlen_aligned;
struct kvec bufferlist[3]; struct kvec bufferlist[3];
u64 aligned_data = 0; u64 aligned_data = 0;
bool signal = false;
bool lock = channel->acquire_ring_lock; bool lock = channel->acquire_ring_lock;
packetlen = desc_size + bufferlen; packetlen = desc_size + bufferlen;
...@@ -865,13 +798,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, ...@@ -865,13 +798,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen); bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, return hv_ringbuffer_write(channel, bufferlist, 3,
&signal, lock, channel->signal_policy); lock, true);
if (ret == 0 && signal)
vmbus_setevent(channel);
return ret;
} }
EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
...@@ -883,14 +811,12 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, ...@@ -883,14 +811,12 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
struct hv_multipage_buffer *multi_pagebuffer, struct hv_multipage_buffer *multi_pagebuffer,
void *buffer, u32 bufferlen, u64 requestid) void *buffer, u32 bufferlen, u64 requestid)
{ {
int ret;
struct vmbus_channel_packet_multipage_buffer desc; struct vmbus_channel_packet_multipage_buffer desc;
u32 descsize; u32 descsize;
u32 packetlen; u32 packetlen;
u32 packetlen_aligned; u32 packetlen_aligned;
struct kvec bufferlist[3]; struct kvec bufferlist[3];
u64 aligned_data = 0; u64 aligned_data = 0;
bool signal = false;
bool lock = channel->acquire_ring_lock; bool lock = channel->acquire_ring_lock;
u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
multi_pagebuffer->len); multi_pagebuffer->len);
...@@ -930,13 +856,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, ...@@ -930,13 +856,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen); bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, return hv_ringbuffer_write(channel, bufferlist, 3,
&signal, lock, channel->signal_policy); lock, true);
if (ret == 0 && signal)
vmbus_setevent(channel);
return ret;
} }
EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
......
...@@ -527,10 +527,10 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, ...@@ -527,10 +527,10 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, int hv_ringbuffer_write(struct vmbus_channel *channel,
struct kvec *kv_list, struct kvec *kv_list,
u32 kv_count, bool *signal, bool lock, u32 kv_count, bool lock,
enum hv_signal_policy policy); bool kick_q);
int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
void *buffer, u32 buflen, u32 *buffer_actual_len, void *buffer, u32 buflen, u32 *buffer_actual_len,
......
...@@ -66,14 +66,25 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi) ...@@ -66,14 +66,25 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi)
* once the ring buffer is empty, it will clear the * once the ring buffer is empty, it will clear the
* interrupt_mask and re-check to see if new data has * interrupt_mask and re-check to see if new data has
* arrived. * arrived.
*
* KYS: Oct. 30, 2016:
* It looks like Windows hosts have logic to deal with DOS attacks that
* can be triggered if it receives interrupts when it is not expecting
* the interrupt. The host expects interrupts only when the ring
* transitions from empty to non-empty (or full to non full on the guest
* to host ring).
* So, base the signaling decision solely on the ring state until the
* host logic is fixed.
*/ */
static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi, static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel,
enum hv_signal_policy policy) bool kick_q)
{ {
struct hv_ring_buffer_info *rbi = &channel->outbound;
virt_mb(); virt_mb();
if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
return false; return;
/* check interrupt_mask before read_index */ /* check interrupt_mask before read_index */
virt_rmb(); virt_rmb();
...@@ -82,9 +93,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi, ...@@ -82,9 +93,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
* ring transitions from being empty to non-empty. * ring transitions from being empty to non-empty.
*/ */
if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
return true; vmbus_setevent(channel);
return false; return;
} }
/* Get the next write location for the specified ring buffer. */ /* Get the next write location for the specified ring buffer. */
...@@ -273,9 +284,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) ...@@ -273,9 +284,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
} }
/* Write to the ring buffer. */ /* Write to the ring buffer. */
int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, int hv_ringbuffer_write(struct vmbus_channel *channel,
struct kvec *kv_list, u32 kv_count, bool *signal, bool lock, struct kvec *kv_list, u32 kv_count, bool lock,
enum hv_signal_policy policy) bool kick_q)
{ {
int i = 0; int i = 0;
u32 bytes_avail_towrite; u32 bytes_avail_towrite;
...@@ -285,6 +296,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, ...@@ -285,6 +296,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
u32 old_write; u32 old_write;
u64 prev_indices = 0; u64 prev_indices = 0;
unsigned long flags = 0; unsigned long flags = 0;
struct hv_ring_buffer_info *outring_info = &channel->outbound;
for (i = 0; i < kv_count; i++) for (i = 0; i < kv_count; i++)
totalbytes_towrite += kv_list[i].iov_len; totalbytes_towrite += kv_list[i].iov_len;
...@@ -337,7 +349,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, ...@@ -337,7 +349,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
if (lock) if (lock)
spin_unlock_irqrestore(&outring_info->ring_lock, flags); spin_unlock_irqrestore(&outring_info->ring_lock, flags);
*signal = hv_need_to_signal(old_write, outring_info, policy); hv_signal_on_write(old_write, channel, kick_q);
return 0; return 0;
} }
......
...@@ -1447,6 +1447,7 @@ void hv_event_tasklet_enable(struct vmbus_channel *channel); ...@@ -1447,6 +1447,7 @@ void hv_event_tasklet_enable(struct vmbus_channel *channel);
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
void vmbus_setevent(struct vmbus_channel *channel);
/* /*
* Negotiated version with the Host. * Negotiated version with the Host.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment