Commit d90fd774 authored by Steven Rostedt (Red Hat)'s avatar Steven Rostedt (Red Hat) Committed by Steven Rostedt

ring-buffer: Reorganize function locations

Functions in ring-buffer.c have gotten interleaved between different
use cases. Move the functions around to get like functions closer
together. This may or may not help gcc keep cache locality, but it
makes it a little easier to work with the code.
Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
parent 7d75e683
...@@ -1887,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event) ...@@ -1887,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
} }
static inline int
rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
unsigned long addr = (unsigned long)event;
unsigned long index;
index = rb_event_index(event);
addr &= PAGE_MASK;
return cpu_buffer->commit_page->page == (void *)addr &&
rb_commit_index(cpu_buffer) == index;
}
static void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long max_count;
/*
* We only race with interrupts and NMIs on this CPU.
* If we own the commit event, then we can commit
* all others that interrupted us, since the interruptions
* are in stack format (they finish before they come
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
again:
max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
return;
if (RB_WARN_ON(cpu_buffer,
rb_is_reader_page(cpu_buffer->tail_page)))
return;
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
local_read(&cpu_buffer->commit_page->page->commit) &
~RB_WRITE_MASK);
barrier();
}
/* again, keep gcc from optimizing */
barrier();
/*
* If an interrupt came in just after the first while loop
* and pushed the tail page forward, we will be left with
* a dangling commit that will never go forward.
*/
if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
goto again;
}
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{ {
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
...@@ -1979,63 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) ...@@ -1979,63 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0; iter->head = 0;
} }
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
/* Not the first event on the page? */
if (rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
return skip_time_extend(event);
}
/**
* rb_update_event - update event type and data
* @event: the event to update
* @type: the type of event
* @length: the size of the event field in the ring buffer
*
* Update the type and data fields of the event. The length
* is the actual size that is written to the ring buffer,
* and with this, we can determine what to place into the
* data field.
*/
static void __always_inline
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event,
struct rb_event_info *info)
{
unsigned length = info->length;
u64 delta = info->delta;
/*
* If we need to add a timestamp, then we
* add it to the start of the resevered space.
*/
if (unlikely(info->add_timestamp)) {
event = rb_add_time_stamp(event, delta);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/* /*
* rb_handle_head_page - writer hit the head page * rb_handle_head_page - writer hit the head page
* *
...@@ -2194,38 +2070,6 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2194,38 +2070,6 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
return 0; return 0;
} }
static unsigned rb_calculate_event_length(unsigned length)
{
struct ring_buffer_event event; /* Used only for sizeof array */
/* zero length can cause confusions */
if (!length)
length++;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
* in the header, we need to add a timestamp. If another
* event comes in when trying to discard this one to increase
* the length, then the timestamp will be added in the allocated
* space of this event. If length is bigger than the size needed
* for the TIME_EXTEND, then padding has to be used. The events
* length must be either RB_LEN_TIME_EXTEND, or greater than or equal
* to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
* As length is a multiple of 4, we only need to worry if it
* is 12 (RB_LEN_TIME_EXTEND + 4).
*/
if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
length += RB_ALIGNMENT;
return length;
}
static inline void static inline void
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long tail, struct rb_event_info *info) unsigned long tail, struct rb_event_info *info)
...@@ -2424,71 +2268,471 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2424,71 +2268,471 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
return NULL; return NULL;
} }
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* Slow path, do not inline */
static inline bool sched_clock_stable(void) static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{ {
return true; event->type_len = RINGBUF_TYPE_TIME_EXTEND;
}
#endif
static inline int /* Not the first event on the page? */
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, if (rb_event_index(event)) {
struct ring_buffer_event *event); event->time_delta = delta & TS_MASK;
static inline void rb_event_discard(struct ring_buffer_event *event); event->array[0] = delta >> TS_SHIFT;
static void } else {
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, /* nope, just zero it */
struct ring_buffer_event *event); event->time_delta = 0;
event->array[0] = 0;
}
static noinline void return skip_time_extend(event);
rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, }
struct ring_buffer_event *event,
struct rb_event_info *info)
{
struct ring_buffer_event *padding;
int length;
int size;
WARN_ONCE(info->delta > (1ULL << 59), /**
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", * rb_update_event - update event type and data
(unsigned long long)info->delta, * @event: the event to update
(unsigned long long)info->ts, * @type: the type of event
(unsigned long long)cpu_buffer->write_stamp, * @length: the size of the event field in the ring buffer
sched_clock_stable() ? "" : *
"If you just came from a suspend/resume,\n" * Update the type and data fields of the event. The length
"please switch to the trace global clock:\n" * is the actual size that is written to the ring buffer,
" echo global > /sys/kernel/debug/tracing/trace_clock\n"); * and with this, we can determine what to place into the
* data field.
*/
static void __always_inline
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event,
struct rb_event_info *info)
{
unsigned length = info->length;
u64 delta = info->delta;
/* /*
* Discarding this event to add a timestamp in front, but * If we need to add a timestamp, then we
* we still need to update the length of it to perform the discard. * add it to the start of the resevered space.
*/ */
rb_update_event(cpu_buffer, event, info); if (unlikely(info->add_timestamp)) {
event = rb_add_time_stamp(event, delta);
if (rb_try_to_discard(cpu_buffer, event)) { length -= RB_LEN_TIME_EXTEND;
info->add_timestamp = 1; delta = 0;
/*
* The time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
*/
info->length += RB_LEN_TIME_EXTEND;
return;
} }
/* event->time_delta = delta;
* Humpf! An event came in after this one, and because it is not a length -= RB_EVNT_HDR_SIZE;
* commit, it will have a delta of zero, thus, it will take on if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
* the timestamp of the previous commit, which happened a long time event->type_len = 0;
* ago (we need to add a timestamp, remember?). event->array[0] = length;
* We need to add the timestamp here. A timestamp is a fixed size } else
* of 8 bytes. That means the rest of the event needs to be event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
* padding. }
*/
size = info->length - RB_LEN_TIME_EXTEND; static unsigned rb_calculate_event_length(unsigned length)
{
/* The padding will have a delta of 1 */ struct ring_buffer_event event; /* Used only for sizeof array */
if (size)
info->delta--; /* zero length can cause confusions */
if (!length)
length++;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
* in the header, we need to add a timestamp. If another
* event comes in when trying to discard this one to increase
* the length, then the timestamp will be added in the allocated
* space of this event. If length is bigger than the size needed
* for the TIME_EXTEND, then padding has to be used. The events
* length must be either RB_LEN_TIME_EXTEND, or greater than or equal
* to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
* As length is a multiple of 4, we only need to worry if it
* is 12 (RB_LEN_TIME_EXTEND + 4).
*/
if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
length += RB_ALIGNMENT;
return length;
}
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline bool sched_clock_stable(void)
{
return true;
}
#endif
static inline int
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
unsigned long new_index, old_index;
struct buffer_page *bpage;
unsigned long index;
unsigned long addr;
new_index = rb_event_index(event);
old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
bpage = cpu_buffer->tail_page;
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
unsigned long event_length = rb_event_length(event);
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
* and write to the next page. That is fine
* because we just shorten what is on this page.
*/
old_index += write_mask;
new_index += write_mask;
index = local_cmpxchg(&bpage->write, old_index, new_index);
if (index == old_index) {
/* update counters */
local_sub(event_length, &cpu_buffer->entries_bytes);
return 1;
}
}
/* could not discard */
return 0;
}
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
static void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long max_count;
/*
* We only race with interrupts and NMIs on this CPU.
* If we own the commit event, then we can commit
* all others that interrupted us, since the interruptions
* are in stack format (they finish before they come
* back to us). This allows us to do a simple loop to
* assign the commit to the tail.
*/
again:
max_count = cpu_buffer->nr_pages * 100;
while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
if (RB_WARN_ON(cpu_buffer, !(--max_count)))
return;
if (RB_WARN_ON(cpu_buffer,
rb_is_reader_page(cpu_buffer->tail_page)))
return;
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
/* add barrier to keep gcc from optimizing too much */
barrier();
}
while (rb_commit_index(cpu_buffer) !=
rb_page_write(cpu_buffer->commit_page)) {
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
local_read(&cpu_buffer->commit_page->page->commit) &
~RB_WRITE_MASK);
barrier();
}
/* again, keep gcc from optimizing */
barrier();
/*
* If an interrupt came in just after the first while loop
* and pushed the tail page forward, we will be left with
* a dangling commit that will never go forward.
*/
if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
goto again;
}
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
if (RB_WARN_ON(cpu_buffer,
!local_read(&cpu_buffer->committing)))
return;
again:
commits = local_read(&cpu_buffer->commits);
/* synchronize with interrupts */
barrier();
if (local_read(&cpu_buffer->committing) == 1)
rb_set_commit_to_write(cpu_buffer);
local_dec(&cpu_buffer->committing);
/* synchronize with interrupts */
barrier();
/*
* Need to account for interrupts coming in between the
* updating of the commit page and the clearing of the
* committing counter.
*/
if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
!local_read(&cpu_buffer->committing)) {
local_inc(&cpu_buffer->committing);
goto again;
}
}
static inline void rb_event_discard(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
if (!event->time_delta)
event->time_delta = 1;
}
static inline int
rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
unsigned long addr = (unsigned long)event;
unsigned long index;
index = rb_event_index(event);
addr &= PAGE_MASK;
return cpu_buffer->commit_page->page == (void *)addr &&
rb_commit_index(cpu_buffer) == index;
}
static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
u64 delta;
/*
* The event first in the commit queue updates the
* time stamp.
*/
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
delta = event->array[0];
delta <<= TS_SHIFT;
delta += event->time_delta;
cpu_buffer->write_stamp += delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
local_inc(&cpu_buffer->entries);
rb_update_write_stamp(cpu_buffer, event);
rb_end_commit(cpu_buffer);
}
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
bool pagebusy;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&buffer->irq_work.work);
}
if (cpu_buffer->irq_work.waiters_pending) {
cpu_buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
}
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
* be modified more than once via an interrupt. To pass this
* information from the lock to the unlock without having to
* access the 'in_interrupt()' functions again (which do show
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
* bit 0 = NMI context
* bit 1 = IRQ context
* bit 2 = SoftIRQ context
* bit 3 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
* context.
*
* When the context is determined, the corresponding bit is
* checked and set (if it was set, then a recursion of that context
* happened).
*
* On unlock, we need to clear this bit. To do so, just subtract
* 1 from the current_context and AND it to itself.
*
* (binary)
* 101 - 1 = 100
* 101 & 100 = 100 (clearing bit zero)
*
* 1010 - 1 = 1001
* 1010 & 1001 = 1000 (clearing bit 1)
*
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
int bit;
if (in_interrupt()) {
if (in_nmi())
bit = RB_CTX_NMI;
else if (in_irq())
bit = RB_CTX_IRQ;
else
bit = RB_CTX_SOFTIRQ;
} else
bit = RB_CTX_NORMAL;
if (unlikely(val & (1 << bit)))
return 1;
val |= (1 << bit);
cpu_buffer->current_context = val;
return 0;
}
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
cpu_buffer->current_context &= cpu_buffer->current_context - 1;
}
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
* @event: The event pointer to commit.
*
* This commits the data to the ring buffer, and releases any locks held.
*
* Must be paired with ring_buffer_lock_reserve.
*/
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
struct ring_buffer_per_cpu *cpu_buffer;
int cpu = raw_smp_processor_id();
cpu_buffer = buffer->buffers[cpu];
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
return 0;
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static noinline void
rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event,
struct rb_event_info *info)
{
struct ring_buffer_event *padding;
int length;
int size;
WARN_ONCE(info->delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)info->delta,
(unsigned long long)info->ts,
(unsigned long long)cpu_buffer->write_stamp,
sched_clock_stable() ? "" :
"If you just came from a suspend/resume,\n"
"please switch to the trace global clock:\n"
" echo global > /sys/kernel/debug/tracing/trace_clock\n");
/*
* Discarding this event to add a timestamp in front, but
* we still need to update the length of it to perform the discard.
*/
rb_update_event(cpu_buffer, event, info);
if (rb_try_to_discard(cpu_buffer, event)) {
info->add_timestamp = 1;
/*
* The time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
*/
info->length += RB_LEN_TIME_EXTEND;
return;
}
/*
* Humpf! An event came in after this one, and because it is not a
* commit, it will have a delta of zero, thus, it will take on
* the timestamp of the previous commit, which happened a long time
* ago (we need to add a timestamp, remember?).
* We need to add the timestamp here. A timestamp is a fixed size
* of 8 bytes. That means the rest of the event needs to be
* padding.
*/
size = info->length - RB_LEN_TIME_EXTEND;
/* The padding will have a delta of 1 */
if (size)
info->delta--;
padding = rb_add_time_stamp(event, info->delta); padding = rb_add_time_stamp(event, info->delta);
...@@ -2573,84 +2817,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ...@@ -2573,84 +2817,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event; return event;
} }
static inline int
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
unsigned long new_index, old_index;
struct buffer_page *bpage;
unsigned long index;
unsigned long addr;
new_index = rb_event_index(event);
old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
bpage = cpu_buffer->tail_page;
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
unsigned long event_length = rb_event_length(event);
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
* and write to the next page. That is fine
* because we just shorten what is on this page.
*/
old_index += write_mask;
new_index += write_mask;
index = local_cmpxchg(&bpage->write, old_index, new_index);
if (index == old_index) {
/* update counters */
local_sub(event_length, &cpu_buffer->entries_bytes);
return 1;
}
}
/* could not discard */
return 0;
}
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
if (RB_WARN_ON(cpu_buffer,
!local_read(&cpu_buffer->committing)))
return;
again:
commits = local_read(&cpu_buffer->commits);
/* synchronize with interrupts */
barrier();
if (local_read(&cpu_buffer->committing) == 1)
rb_set_commit_to_write(cpu_buffer);
local_dec(&cpu_buffer->committing);
/* synchronize with interrupts */
barrier();
/*
* Need to account for interrupts coming in between the
* updating of the commit page and the clearing of the
* committing counter.
*/
if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
!local_read(&cpu_buffer->committing)) {
local_inc(&cpu_buffer->committing);
goto again;
}
}
static struct ring_buffer_event * static struct ring_buffer_event *
rb_reserve_next_event(struct ring_buffer *buffer, rb_reserve_next_event(struct ring_buffer *buffer,
struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_per_cpu *cpu_buffer,
...@@ -2706,75 +2872,6 @@ rb_reserve_next_event(struct ring_buffer *buffer, ...@@ -2706,75 +2872,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
return NULL; return NULL;
} }
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
* be modified more than once via an interrupt. To pass this
* information from the lock to the unlock without having to
* access the 'in_interrupt()' functions again (which do show
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
* bit 0 = NMI context
* bit 1 = IRQ context
* bit 2 = SoftIRQ context
* bit 3 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
* context.
*
* When the context is determined, the corresponding bit is
* checked and set (if it was set, then a recursion of that context
* happened).
*
* On unlock, we need to clear this bit. To do so, just subtract
* 1 from the current_context and AND it to itself.
*
* (binary)
* 101 - 1 = 100
* 101 & 100 = 100 (clearing bit zero)
*
* 1010 - 1 = 1001
* 1010 & 1001 = 1000 (clearing bit 1)
*
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
int bit;
if (in_interrupt()) {
if (in_nmi())
bit = RB_CTX_NMI;
else if (in_irq())
bit = RB_CTX_IRQ;
else
bit = RB_CTX_SOFTIRQ;
} else
bit = RB_CTX_NORMAL;
if (unlikely(val & (1 << bit)))
return 1;
val |= (1 << bit);
cpu_buffer->current_context = val;
return 0;
}
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
cpu_buffer->current_context &= cpu_buffer->current_context - 1;
}
/** /**
* ring_buffer_lock_reserve - reserve a part of the buffer * ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from * @buffer: the ring buffer to reserve from
...@@ -2833,111 +2930,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) ...@@ -2833,111 +2930,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
} }
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
u64 delta;
/*
* The event first in the commit queue updates the
* time stamp.
*/
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
delta = event->array[0];
delta <<= TS_SHIFT;
delta += event->time_delta;
cpu_buffer->write_stamp += delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
local_inc(&cpu_buffer->entries);
rb_update_write_stamp(cpu_buffer, event);
rb_end_commit(cpu_buffer);
}
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
bool pagebusy;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&buffer->irq_work.work);
}
if (cpu_buffer->irq_work.waiters_pending) {
cpu_buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
}
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
* @event: The event pointer to commit.
*
* This commits the data to the ring buffer, and releases any locks held.
*
* Must be paired with ring_buffer_lock_reserve.
*/
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
struct ring_buffer_per_cpu *cpu_buffer;
int cpu = raw_smp_processor_id();
cpu_buffer = buffer->buffers[cpu];
rb_commit(cpu_buffer, event);
rb_wakeups(buffer, cpu_buffer);
trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
return 0;
}
EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
if (!event->time_delta)
event->time_delta = 1;
}
/* /*
* Decrement the entries to the page that an event is on. * Decrement the entries to the page that an event is on.
* The event does not even need to exist, only the pointer * The event does not even need to exist, only the pointer
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment