trace_events.c 65.3 KB
Newer Older
1 2 3 4 5
/*
 * event tracer
 *
 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
 *
6 7 8
 *  - Added format output of fields of the trace point.
 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
 *
9 10
 */

11 12
#define pr_fmt(fmt) fmt

13 14 15
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
16
#include <linux/tracefs.h>
17 18 19
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
20
#include <linux/slab.h>
21
#include <linux/delay.h>
22

23 24
#include <asm/setup.h>

25
#include "trace_output.h"
26

27
#undef TRACE_SYSTEM
28 29
#define TRACE_SYSTEM "TRACE_SYSTEM"

30
DEFINE_MUTEX(event_mutex);
31

32
LIST_HEAD(ftrace_events);
33
static LIST_HEAD(ftrace_common_fields);
34

35 36 37 38 39
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)

static struct kmem_cache *field_cachep;
static struct kmem_cache *file_cachep;

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
#define SYSTEM_FL_FREE_NAME		(1 << 31)

static inline int system_refcount(struct event_subsystem *system)
{
	return system->ref_count & ~SYSTEM_FL_FREE_NAME;
}

static int system_refcount_inc(struct event_subsystem *system)
{
	return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME;
}

static int system_refcount_dec(struct event_subsystem *system)
{
	return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME;
}

57 58 59 60 61 62 63 64 65 66 67 68 69
/* Double loops, do not use break, only goto's work */
#define do_for_each_event_file(tr, file)			\
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
		list_for_each_entry(file, &tr->events, list)

#define do_for_each_event_file_safe(tr, file)			\
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
		struct ftrace_event_file *___n;				\
		list_for_each_entry_safe(file, ___n, &tr->events, list)

#define while_for_each_event_file()		\
	}

70
static struct list_head *
71 72 73 74 75 76 77
trace_get_fields(struct ftrace_event_call *event_call)
{
	if (!event_call->class->get_fields)
		return &event_call->class->fields;
	return event_call->class->get_fields(event_call);
}

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
	struct ftrace_event_field *field;

	list_for_each_entry(field, head, link) {
		if (!strcmp(field->name, name))
			return field;
	}

	return NULL;
}

struct ftrace_event_field *
trace_find_event_field(struct ftrace_event_call *call, char *name)
{
	struct ftrace_event_field *field;
	struct list_head *head;

	field = __find_event_field(&ftrace_common_fields, name);
	if (field)
		return field;

	head = trace_get_fields(call);
	return __find_event_field(head, name);
}

105 106 107
static int __trace_define_field(struct list_head *head, const char *type,
				const char *name, int offset, int size,
				int is_signed, int filter_type)
108 109 110
{
	struct ftrace_event_field *field;

111
	field = kmem_cache_alloc(field_cachep, GFP_TRACE);
112
	if (!field)
113
		return -ENOMEM;
114

115 116
	field->name = name;
	field->type = type;
117

118 119 120 121 122
	if (filter_type == FILTER_OTHER)
		field->filter_type = filter_assign_type(type);
	else
		field->filter_type = filter_type;

123 124
	field->offset = offset;
	field->size = size;
125
	field->is_signed = is_signed;
126

127
	list_add(&field->link, head);
128 129 130

	return 0;
}
131 132 133 134 135 136 137 138 139 140 141 142 143 144

int trace_define_field(struct ftrace_event_call *call, const char *type,
		       const char *name, int offset, int size, int is_signed,
		       int filter_type)
{
	struct list_head *head;

	if (WARN_ON(!call->class))
		return 0;

	head = trace_get_fields(call);
	return __trace_define_field(head, type, name, offset, size,
				    is_signed, filter_type);
}
145
EXPORT_SYMBOL_GPL(trace_define_field);
146

147
#define __common_field(type, item)					\
148 149 150 151 152
	ret = __trace_define_field(&ftrace_common_fields, #type,	\
				   "common_" #item,			\
				   offsetof(typeof(ent), item),		\
				   sizeof(ent.item),			\
				   is_signed_type(type), FILTER_OTHER);	\
153 154 155
	if (ret)							\
		return ret;

156
static int trace_define_common_fields(void)
157 158 159 160 161 162 163 164 165 166 167 168
{
	int ret;
	struct trace_entry ent;

	__common_field(unsigned short, type);
	__common_field(unsigned char, flags);
	__common_field(unsigned char, preempt_count);
	__common_field(int, pid);

	return ret;
}

169
static void trace_destroy_fields(struct ftrace_event_call *call)
170 171
{
	struct ftrace_event_field *field, *next;
172
	struct list_head *head;
173

174 175
	head = trace_get_fields(call);
	list_for_each_entry_safe(field, next, head, link) {
176
		list_del(&field->link);
177
		kmem_cache_free(field_cachep, field);
178 179 180
	}
}

181 182 183 184
int trace_event_raw_init(struct ftrace_event_call *call)
{
	int id;

185
	id = register_ftrace_event(&call->event);
186 187 188 189 190 191 192
	if (!id)
		return -ENODEV;

	return 0;
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
				  struct ftrace_event_file *ftrace_file,
				  unsigned long len)
{
	struct ftrace_event_call *event_call = ftrace_file->event_call;

	local_save_flags(fbuffer->flags);
	fbuffer->pc = preempt_count();
	fbuffer->ftrace_file = ftrace_file;

	fbuffer->event =
		trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file,
						event_call->event.type, len,
						fbuffer->flags, fbuffer->pc);
	if (!fbuffer->event)
		return NULL;

	fbuffer->entry = ring_buffer_event_data(fbuffer->event);
	return fbuffer->entry;
}
EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve);

215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
static DEFINE_SPINLOCK(tracepoint_iter_lock);

static void output_printk(struct ftrace_event_buffer *fbuffer)
{
	struct ftrace_event_call *event_call;
	struct trace_event *event;
	unsigned long flags;
	struct trace_iterator *iter = tracepoint_print_iter;

	if (!iter)
		return;

	event_call = fbuffer->ftrace_file->event_call;
	if (!event_call || !event_call->event.funcs ||
	    !event_call->event.funcs->trace)
		return;

	event = &fbuffer->ftrace_file->event_call->event;

	spin_lock_irqsave(&tracepoint_iter_lock, flags);
	trace_seq_init(&iter->seq);
	iter->ent = fbuffer->entry;
	event_call->event.funcs->trace(iter, 0, event);
	trace_seq_putc(&iter->seq, 0);
	printk("%s", iter->seq.buffer);

	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
}

244 245
void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer)
{
246 247 248
	if (tracepoint_printk)
		output_printk(fbuffer);

249 250 251 252 253 254
	event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer,
				    fbuffer->event, fbuffer->entry,
				    fbuffer->flags, fbuffer->pc);
}
EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit);

255 256
int ftrace_event_reg(struct ftrace_event_call *call,
		     enum trace_reg type, void *data)
257
{
258 259
	struct ftrace_event_file *file = data;

260
	WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
261 262
	switch (type) {
	case TRACE_REG_REGISTER:
263
		return tracepoint_probe_register(call->tp,
264
						 call->class->probe,
265
						 file);
266
	case TRACE_REG_UNREGISTER:
267
		tracepoint_probe_unregister(call->tp,
268
					    call->class->probe,
269
					    file);
270 271 272 273
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
274
		return tracepoint_probe_register(call->tp,
275 276 277
						 call->class->perf_probe,
						 call);
	case TRACE_REG_PERF_UNREGISTER:
278
		tracepoint_probe_unregister(call->tp,
279 280 281
					    call->class->perf_probe,
					    call);
		return 0;
282 283
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
284 285
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
286
		return 0;
287 288 289 290 291 292
#endif
	}
	return 0;
}
EXPORT_SYMBOL_GPL(ftrace_event_reg);

293 294
void trace_event_enable_cmd_record(bool enable)
{
295 296
	struct ftrace_event_file *file;
	struct trace_array *tr;
297 298

	mutex_lock(&event_mutex);
299 300 301
	do_for_each_event_file(tr, file) {

		if (!(file->flags & FTRACE_EVENT_FL_ENABLED))
302 303 304 305
			continue;

		if (enable) {
			tracing_start_cmdline_record();
306
			set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
307 308
		} else {
			tracing_stop_cmdline_record();
309
			clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
310
		}
311
	} while_for_each_event_file();
312 313 314
	mutex_unlock(&event_mutex);
}

315 316
static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
					 int enable, int soft_disable)
317
{
318
	struct ftrace_event_call *call = file->event_call;
319
	int ret = 0;
320
	int disable;
321

322 323
	switch (enable) {
	case 0:
324
		/*
325 326
		 * When soft_disable is set and enable is cleared, the sm_ref
		 * reference counter is decremented. If it reaches 0, we want
327 328 329 330 331 332 333 334 335 336 337
		 * to clear the SOFT_DISABLED flag but leave the event in the
		 * state that it was. That is, if the event was enabled and
		 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
		 * is set we do not want the event to be enabled before we
		 * clear the bit.
		 *
		 * When soft_disable is not set but the SOFT_MODE flag is,
		 * we do nothing. Do not disable the tracepoint, otherwise
		 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
		 */
		if (soft_disable) {
338 339
			if (atomic_dec_return(&file->sm_ref) > 0)
				break;
340 341 342 343 344 345 346
			disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
			clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
		} else
			disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE);

		if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) {
			clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
347
			if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) {
348
				tracing_stop_cmdline_record();
349
				clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
350
			}
351
			call->class->reg(call, TRACE_REG_UNREGISTER, file);
352
		}
353
		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
354 355
		if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
			set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
356 357
		else
			clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
358 359
		break;
	case 1:
360 361 362 363 364 365 366 367 368 369
		/*
		 * When soft_disable is set and enable is set, we want to
		 * register the tracepoint for the event, but leave the event
		 * as is. That means, if the event was already enabled, we do
		 * nothing (but set SOFT_MODE). If the event is disabled, we
		 * set SOFT_DISABLED before enabling the event tracepoint, so
		 * it still seems to be disabled.
		 */
		if (!soft_disable)
			clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
370 371 372
		else {
			if (atomic_inc_return(&file->sm_ref) > 1)
				break;
373
			set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
374
		}
375

376
		if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
377 378 379 380 381

			/* Keep the event disabled, when going to SOFT_MODE. */
			if (soft_disable)
				set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);

382 383
			if (trace_flags & TRACE_ITER_RECORD_CMD) {
				tracing_start_cmdline_record();
384
				set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
385
			}
386
			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
387 388 389
			if (ret) {
				tracing_stop_cmdline_record();
				pr_info("event trace: Could not enable event "
390
					"%s\n", ftrace_event_name(call));
391 392
				break;
			}
393
			set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
394 395 396

			/* WAS_ENABLED gets set but never cleared. */
			call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
397 398 399
		}
		break;
	}
400 401

	return ret;
402 403
}

404 405 406 407 408 409
int trace_event_enable_disable(struct ftrace_event_file *file,
			       int enable, int soft_disable)
{
	return __ftrace_event_enable_disable(file, enable, soft_disable);
}

410 411 412 413 414 415
static int ftrace_event_enable_disable(struct ftrace_event_file *file,
				       int enable)
{
	return __ftrace_event_enable_disable(file, enable, 0);
}

416
static void ftrace_clear_events(struct trace_array *tr)
417
{
418
	struct ftrace_event_file *file;
419 420

	mutex_lock(&event_mutex);
421 422
	list_for_each_entry(file, &tr->events, list) {
		ftrace_event_enable_disable(file, 0);
423 424 425 426
	}
	mutex_unlock(&event_mutex);
}

427 428 429 430
static void __put_system(struct event_subsystem *system)
{
	struct event_filter *filter = system->filter;

431 432
	WARN_ON_ONCE(system_refcount(system) == 0);
	if (system_refcount_dec(system))
433 434
		return;

435 436
	list_del(&system->list);

437 438 439 440
	if (filter) {
		kfree(filter->filter_string);
		kfree(filter);
	}
441 442
	if (system->ref_count & SYSTEM_FL_FREE_NAME)
		kfree(system->name);
443 444 445 446 447
	kfree(system);
}

static void __get_system(struct event_subsystem *system)
{
448 449
	WARN_ON_ONCE(system_refcount(system) == 0);
	system_refcount_inc(system);
450 451
}

452 453 454 455 456 457 458 459 460 461 462
static void __get_system_dir(struct ftrace_subsystem_dir *dir)
{
	WARN_ON_ONCE(dir->ref_count == 0);
	dir->ref_count++;
	__get_system(dir->subsystem);
}

static void __put_system_dir(struct ftrace_subsystem_dir *dir)
{
	WARN_ON_ONCE(dir->ref_count == 0);
	/* If the subsystem is about to be freed, the dir must be too */
463
	WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
464 465 466 467 468 469 470

	__put_system(dir->subsystem);
	if (!--dir->ref_count)
		kfree(dir);
}

static void put_system(struct ftrace_subsystem_dir *dir)
471 472
{
	mutex_lock(&event_mutex);
473
	__put_system_dir(dir);
474 475 476
	mutex_unlock(&event_mutex);
}

477 478 479 480 481 482
static void remove_subsystem(struct ftrace_subsystem_dir *dir)
{
	if (!dir)
		return;

	if (!--dir->nr_events) {
483
		tracefs_remove_recursive(dir->entry);
484 485 486 487 488 489 490
		list_del(&dir->list);
		__put_system_dir(dir);
	}
}

static void remove_event_file_dir(struct ftrace_event_file *file)
{
491 492 493 494 495
	struct dentry *dir = file->dir;
	struct dentry *child;

	if (dir) {
		spin_lock(&dir->d_lock);	/* probably unneeded */
496
		list_for_each_entry(child, &dir->d_subdirs, d_child) {
497 498
			if (d_really_is_positive(child))	/* probably unneeded */
				d_inode(child)->i_private = NULL;
499 500 501
		}
		spin_unlock(&dir->d_lock);

502
		tracefs_remove_recursive(dir);
503 504
	}

505 506
	list_del(&file->list);
	remove_subsystem(file->system);
507
	free_event_filter(file->filter);
508 509 510
	kmem_cache_free(file_cachep, file);
}

511 512 513
/*
 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 */
514 515 516
static int
__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
			      const char *sub, const char *event, int set)
517
{
518
	struct ftrace_event_file *file;
519
	struct ftrace_event_call *call;
520
	const char *name;
521
	int ret = -EINVAL;
522

523 524 525
	list_for_each_entry(file, &tr->events, list) {

		call = file->event_call;
526
		name = ftrace_event_name(call);
527

528
		if (!name || !call->class || !call->class->reg)
529 530
			continue;

531 532 533
		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
			continue;

534
		if (match &&
535
		    strcmp(match, name) != 0 &&
536
		    strcmp(match, call->class->system) != 0)
537 538
			continue;

539
		if (sub && strcmp(sub, call->class->system) != 0)
540 541
			continue;

542
		if (event && strcmp(event, name) != 0)
543 544
			continue;

545
		ftrace_event_enable_disable(file, set);
546 547 548

		ret = 0;
	}
549 550 551 552 553 554 555 556 557 558 559

	return ret;
}

static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
				  const char *sub, const char *event, int set)
{
	int ret;

	mutex_lock(&event_mutex);
	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
560 561 562 563 564
	mutex_unlock(&event_mutex);

	return ret;
}

565
static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
566
{
567
	char *event = NULL, *sub = NULL, *match;
568
	int ret;
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592

	/*
	 * The buf format can be <subsystem>:<event-name>
	 *  *:<event-name> means any event by that name.
	 *  :<event-name> is the same.
	 *
	 *  <subsystem>:* means all events in that subsystem
	 *  <subsystem>: means the same.
	 *
	 *  <name> (no ':') means all events in a subsystem with
	 *  the name <name> or any event that matches <name>
	 */

	match = strsep(&buf, ":");
	if (buf) {
		sub = match;
		event = buf;
		match = NULL;

		if (!strlen(sub) || strcmp(sub, "*") == 0)
			sub = NULL;
		if (!strlen(event) || strcmp(event, "*") == 0)
			event = NULL;
	}
593

594 595 596 597 598 599 600
	ret = __ftrace_set_clr_event(tr, match, sub, event, set);

	/* Put back the colon to allow this to be called again */
	if (buf)
		*(buf - 1) = ':';

	return ret;
601 602
}

603 604 605 606 607 608 609 610 611 612 613 614 615 616
/**
 * trace_set_clr_event - enable or disable an event
 * @system: system name to match (NULL for any system)
 * @event: event name to match (NULL for all events, within system)
 * @set: 1 to enable, 0 to disable
 *
 * This is a way for other parts of the kernel to enable or disable
 * event recording.
 *
 * Returns 0 on success, -EINVAL if the parameters do not match any
 * registered events.
 */
int trace_set_clr_event(const char *system, const char *event, int set)
{
617 618
	struct trace_array *tr = top_trace_array();

619 620 621
	if (!tr)
		return -ENODEV;

622
	return __ftrace_set_clr_event(tr, NULL, system, event, set);
623
}
624
EXPORT_SYMBOL_GPL(trace_set_clr_event);
625

626 627 628 629 630 631 632
/* 128 should be much more than enough */
#define EVENT_BUF_SIZE		127

static ssize_t
ftrace_event_write(struct file *file, const char __user *ubuf,
		   size_t cnt, loff_t *ppos)
{
633
	struct trace_parser parser;
634 635
	struct seq_file *m = file->private_data;
	struct trace_array *tr = m->private;
636
	ssize_t read, ret;
637

638
	if (!cnt)
639 640
		return 0;

641 642 643 644
	ret = tracing_update_buffers();
	if (ret < 0)
		return ret;

645
	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
646 647
		return -ENOMEM;

648 649
	read = trace_get_user(&parser, ubuf, cnt, ppos);

650
	if (read >= 0 && trace_parser_loaded((&parser))) {
651
		int set = 1;
652

653
		if (*parser.buffer == '!')
654 655
			set = 0;

656 657
		parser.buffer[parser.idx] = 0;

658
		ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
659
		if (ret)
660
			goto out_put;
661 662 663 664
	}

	ret = read;

665 666
 out_put:
	trace_parser_put(&parser);
667 668 669 670 671 672 673

	return ret;
}

static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
674 675 676
	struct ftrace_event_file *file = v;
	struct ftrace_event_call *call;
	struct trace_array *tr = m->private;
677 678 679

	(*pos)++;

680 681
	list_for_each_entry_continue(file, &tr->events, list) {
		call = file->event_call;
682 683 684 685
		/*
		 * The ftrace subsystem is for showing formats only.
		 * They can not be enabled or disabled via the event files.
		 */
686 687
		if (call->class && call->class->reg &&
		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
688
			return file;
689
	}
690

691
	return NULL;
692 693 694 695
}

static void *t_start(struct seq_file *m, loff_t *pos)
{
696 697
	struct ftrace_event_file *file;
	struct trace_array *tr = m->private;
698 699
	loff_t l;

700
	mutex_lock(&event_mutex);
701

702
	file = list_entry(&tr->events, struct ftrace_event_file, list);
703
	for (l = 0; l <= *pos; ) {
704 705
		file = t_next(m, file, &l);
		if (!file)
706 707
			break;
	}
708
	return file;
709 710 711 712 713
}

static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
714 715
	struct ftrace_event_file *file = v;
	struct trace_array *tr = m->private;
716 717 718

	(*pos)++;

719 720 721
	list_for_each_entry_continue(file, &tr->events, list) {
		if (file->flags & FTRACE_EVENT_FL_ENABLED)
			return file;
722 723
	}

724
	return NULL;
725 726 727 728
}

static void *s_start(struct seq_file *m, loff_t *pos)
{
729 730
	struct ftrace_event_file *file;
	struct trace_array *tr = m->private;
731 732
	loff_t l;

733
	mutex_lock(&event_mutex);
734

735
	file = list_entry(&tr->events, struct ftrace_event_file, list);
736
	for (l = 0; l <= *pos; ) {
737 738
		file = s_next(m, file, &l);
		if (!file)
739 740
			break;
	}
741
	return file;
742 743 744 745
}

static int t_show(struct seq_file *m, void *v)
{
746 747
	struct ftrace_event_file *file = v;
	struct ftrace_event_call *call = file->event_call;
748

749 750
	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
		seq_printf(m, "%s:", call->class->system);
751
	seq_printf(m, "%s\n", ftrace_event_name(call));
752 753 754 755 756 757

	return 0;
}

static void t_stop(struct seq_file *m, void *p)
{
758
	mutex_unlock(&event_mutex);
759 760
}

761 762 763 764
static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
		  loff_t *ppos)
{
765 766
	struct ftrace_event_file *file;
	unsigned long flags;
767 768
	char buf[4] = "0";

769 770 771 772 773 774 775 776 777 778 779
	mutex_lock(&event_mutex);
	file = event_file_data(filp);
	if (likely(file))
		flags = file->flags;
	mutex_unlock(&event_mutex);

	if (!file)
		return -ENODEV;

	if (flags & FTRACE_EVENT_FL_ENABLED &&
	    !(flags & FTRACE_EVENT_FL_SOFT_DISABLED))
780 781
		strcpy(buf, "1");

782 783
	if (flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
	    flags & FTRACE_EVENT_FL_SOFT_MODE)
784 785 786
		strcat(buf, "*");

	strcat(buf, "\n");
787

788
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
789 790 791 792 793 794
}

static ssize_t
event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
		   loff_t *ppos)
{
795
	struct ftrace_event_file *file;
796 797 798
	unsigned long val;
	int ret;

799 800
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
801 802
		return ret;

803 804 805 806
	ret = tracing_update_buffers();
	if (ret < 0)
		return ret;

807 808 809
	switch (val) {
	case 0:
	case 1:
810
		ret = -ENODEV;
811
		mutex_lock(&event_mutex);
812 813 814
		file = event_file_data(filp);
		if (likely(file))
			ret = ftrace_event_enable_disable(file, val);
815
		mutex_unlock(&event_mutex);
816 817 818 819 820 821 822 823
		break;

	default:
		return -EINVAL;
	}

	*ppos += cnt;

824
	return ret ? ret : cnt;
825 826
}

827 828 829 830
static ssize_t
system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
		   loff_t *ppos)
{
831
	const char set_to_char[4] = { '?', '0', '1', 'X' };
832 833
	struct ftrace_subsystem_dir *dir = filp->private_data;
	struct event_subsystem *system = dir->subsystem;
834
	struct ftrace_event_call *call;
835 836
	struct ftrace_event_file *file;
	struct trace_array *tr = dir->tr;
837
	char buf[2];
838
	int set = 0;
839 840 841
	int ret;

	mutex_lock(&event_mutex);
842 843
	list_for_each_entry(file, &tr->events, list) {
		call = file->event_call;
844
		if (!ftrace_event_name(call) || !call->class || !call->class->reg)
845 846
			continue;

847
		if (system && strcmp(call->class->system, system->name) != 0)
848 849 850 851 852 853 854
			continue;

		/*
		 * We need to find out if all the events are set
		 * or if all events or cleared, or if we have
		 * a mixture.
		 */
855
		set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED));
856

857 858 859
		/*
		 * If we have a mixture, no need to look further.
		 */
860
		if (set == 3)
861 862 863 864
			break;
	}
	mutex_unlock(&event_mutex);

865
	buf[0] = set_to_char[set];
866 867 868 869 870 871 872 873 874 875 876
	buf[1] = '\n';

	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);

	return ret;
}

static ssize_t
system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
		    loff_t *ppos)
{
877 878
	struct ftrace_subsystem_dir *dir = filp->private_data;
	struct event_subsystem *system = dir->subsystem;
879
	const char *name = NULL;
880 881 882
	unsigned long val;
	ssize_t ret;

883 884
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
885 886 887 888 889 890
		return ret;

	ret = tracing_update_buffers();
	if (ret < 0)
		return ret;

891
	if (val != 0 && val != 1)
892 893
		return -EINVAL;

894 895 896 897 898 899 900
	/*
	 * Opening of "enable" adds a ref count to system,
	 * so the name is safe to use.
	 */
	if (system)
		name = system->name;

901
	ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
902
	if (ret)
903
		goto out;
904 905 906

	ret = cnt;

907
out:
908 909 910 911 912
	*ppos += cnt;

	return ret;
}

913 914
enum {
	FORMAT_HEADER		= 1,
915 916
	FORMAT_FIELD_SEPERATOR	= 2,
	FORMAT_PRINTFMT		= 3,
917 918 919
};

static void *f_next(struct seq_file *m, void *v, loff_t *pos)
920
{
921
	struct ftrace_event_call *call = event_file_data(m->private);
922 923
	struct list_head *common_head = &ftrace_common_fields;
	struct list_head *head = trace_get_fields(call);
924
	struct list_head *node = v;
925

926
	(*pos)++;
927

928 929
	switch ((unsigned long)v) {
	case FORMAT_HEADER:
930 931
		node = common_head;
		break;
932

933
	case FORMAT_FIELD_SEPERATOR:
934 935
		node = head;
		break;
936

937 938 939
	case FORMAT_PRINTFMT:
		/* all done */
		return NULL;
940 941
	}

942 943
	node = node->prev;
	if (node == common_head)
944
		return (void *)FORMAT_FIELD_SEPERATOR;
945
	else if (node == head)
946
		return (void *)FORMAT_PRINTFMT;
947 948
	else
		return node;
949 950 951 952
}

static int f_show(struct seq_file *m, void *v)
{
953
	struct ftrace_event_call *call = event_file_data(m->private);
954 955 956 957 958
	struct ftrace_event_field *field;
	const char *array_descriptor;

	switch ((unsigned long)v) {
	case FORMAT_HEADER:
959
		seq_printf(m, "name: %s\n", ftrace_event_name(call));
960
		seq_printf(m, "ID: %d\n", call->event.type);
961
		seq_puts(m, "format:\n");
962
		return 0;
963

964 965 966 967
	case FORMAT_FIELD_SEPERATOR:
		seq_putc(m, '\n');
		return 0;

968 969 970 971
	case FORMAT_PRINTFMT:
		seq_printf(m, "\nprint fmt: %s\n",
			   call->print_fmt);
		return 0;
972
	}
973

974
	field = list_entry(v, struct ftrace_event_field, link);
975 976 977 978 979 980 981 982
	/*
	 * Smartly shows the array type(except dynamic array).
	 * Normal:
	 *	field:TYPE VAR
	 * If TYPE := TYPE[LEN], it is shown:
	 *	field:TYPE VAR[LEN]
	 */
	array_descriptor = strchr(field->type, '[');
983

984 985
	if (!strncmp(field->type, "__data_loc", 10))
		array_descriptor = NULL;
986

987 988 989 990 991 992 993 994 995 996
	if (!array_descriptor)
		seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
			   field->type, field->name, field->offset,
			   field->size, !!field->is_signed);
	else
		seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
			   (int)(array_descriptor - field->type),
			   field->type, field->name,
			   array_descriptor, field->offset,
			   field->size, !!field->is_signed);
997

998 999
	return 0;
}
1000

1001 1002 1003 1004 1005
static void *f_start(struct seq_file *m, loff_t *pos)
{
	void *p = (void *)FORMAT_HEADER;
	loff_t l = 0;

1006 1007 1008 1009 1010
	/* ->stop() is called even if ->start() fails */
	mutex_lock(&event_mutex);
	if (!event_file_data(m->private))
		return ERR_PTR(-ENODEV);

1011 1012 1013 1014 1015 1016
	while (l < *pos && p)
		p = f_next(m, p, &l);

	return p;
}

1017 1018
static void f_stop(struct seq_file *m, void *p)
{
1019
	mutex_unlock(&event_mutex);
1020
}
1021

1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
static const struct seq_operations trace_format_seq_ops = {
	.start		= f_start,
	.next		= f_next,
	.stop		= f_stop,
	.show		= f_show,
};

static int trace_format_open(struct inode *inode, struct file *file)
{
	struct seq_file *m;
	int ret;

	ret = seq_open(file, &trace_format_seq_ops);
	if (ret < 0)
		return ret;

	m = file->private_data;
1039
	m->private = file;
1040 1041

	return 0;
1042 1043
}

1044 1045 1046
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
1047
	int id = (long)event_file_data(filp);
1048 1049
	char buf[32];
	int len;
1050 1051 1052 1053

	if (*ppos)
		return 0;

1054 1055 1056 1057 1058
	if (unlikely(!id))
		return -ENODEV;

	len = sprintf(buf, "%d\n", id);

1059
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1060 1061
}

1062 1063 1064 1065
static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
		  loff_t *ppos)
{
1066
	struct ftrace_event_file *file;
1067
	struct trace_seq *s;
1068
	int r = -ENODEV;
1069 1070 1071 1072 1073

	if (*ppos)
		return 0;

	s = kmalloc(sizeof(*s), GFP_KERNEL);
1074

1075 1076 1077 1078 1079
	if (!s)
		return -ENOMEM;

	trace_seq_init(s);

1080
	mutex_lock(&event_mutex);
1081 1082 1083
	file = event_file_data(filp);
	if (file)
		print_event_filter(file, s);
1084 1085
	mutex_unlock(&event_mutex);

1086
	if (file)
1087 1088
		r = simple_read_from_buffer(ubuf, cnt, ppos,
					    s->buffer, trace_seq_used(s));
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098

	kfree(s);

	return r;
}

static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
		   loff_t *ppos)
{
1099
	struct ftrace_event_file *file;
1100
	char *buf;
1101
	int err = -ENODEV;
1102

1103
	if (cnt >= PAGE_SIZE)
1104 1105
		return -EINVAL;

1106 1107
	buf = (char *)__get_free_page(GFP_TEMPORARY);
	if (!buf)
1108 1109
		return -ENOMEM;

1110 1111 1112
	if (copy_from_user(buf, ubuf, cnt)) {
		free_page((unsigned long) buf);
		return -EFAULT;
1113
	}
1114
	buf[cnt] = '\0';
1115

1116
	mutex_lock(&event_mutex);
1117 1118 1119
	file = event_file_data(filp);
	if (file)
		err = apply_event_filter(file, buf);
1120 1121
	mutex_unlock(&event_mutex);

1122 1123
	free_page((unsigned long) buf);
	if (err < 0)
1124
		return err;
1125

1126 1127 1128 1129 1130
	*ppos += cnt;

	return cnt;
}

1131 1132 1133 1134 1135
static LIST_HEAD(event_subsystems);

static int subsystem_open(struct inode *inode, struct file *filp)
{
	struct event_subsystem *system = NULL;
1136 1137
	struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */
	struct trace_array *tr;
1138 1139
	int ret;

1140 1141 1142
	if (tracing_is_disabled())
		return -ENODEV;

1143
	/* Make sure the system still exists */
1144
	mutex_lock(&trace_types_lock);
1145
	mutex_lock(&event_mutex);
1146 1147 1148 1149 1150 1151 1152 1153 1154
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		list_for_each_entry(dir, &tr->systems, list) {
			if (dir == inode->i_private) {
				/* Don't open systems with no events */
				if (dir->nr_events) {
					__get_system_dir(dir);
					system = dir->subsystem;
				}
				goto exit_loop;
1155 1156 1157
			}
		}
	}
1158
 exit_loop:
1159
	mutex_unlock(&event_mutex);
1160
	mutex_unlock(&trace_types_lock);
1161

1162
	if (!system)
1163 1164
		return -ENODEV;

1165 1166 1167
	/* Some versions of gcc think dir can be uninitialized here */
	WARN_ON(!dir);

1168 1169 1170 1171 1172 1173
	/* Still need to increment the ref count of the system */
	if (trace_array_get(tr) < 0) {
		put_system(dir);
		return -ENODEV;
	}

1174
	ret = tracing_open_generic(inode, filp);
1175 1176
	if (ret < 0) {
		trace_array_put(tr);
1177
		put_system(dir);
1178
	}
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188

	return ret;
}

static int system_tr_open(struct inode *inode, struct file *filp)
{
	struct ftrace_subsystem_dir *dir;
	struct trace_array *tr = inode->i_private;
	int ret;

1189 1190 1191
	if (tracing_is_disabled())
		return -ENODEV;

1192 1193 1194
	if (trace_array_get(tr) < 0)
		return -ENODEV;

1195 1196
	/* Make a temporary dir that has no system but points to tr */
	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1197 1198
	if (!dir) {
		trace_array_put(tr);
1199
		return -ENOMEM;
1200
	}
1201 1202 1203 1204

	dir->tr = tr;

	ret = tracing_open_generic(inode, filp);
1205 1206
	if (ret < 0) {
		trace_array_put(tr);
1207
		kfree(dir);
1208
		return ret;
1209
	}
1210 1211

	filp->private_data = dir;
1212

1213
	return 0;
1214 1215 1216 1217
}

static int subsystem_release(struct inode *inode, struct file *file)
{
1218
	struct ftrace_subsystem_dir *dir = file->private_data;
1219

1220 1221
	trace_array_put(dir->tr);

1222 1223 1224 1225 1226 1227 1228 1229 1230
	/*
	 * If dir->subsystem is NULL, then this is a temporary
	 * descriptor that was made for a trace_array to enable
	 * all subsystems.
	 */
	if (dir->subsystem)
		put_system(dir);
	else
		kfree(dir);
1231 1232 1233 1234

	return 0;
}

1235 1236 1237 1238
static ssize_t
subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
		      loff_t *ppos)
{
1239 1240
	struct ftrace_subsystem_dir *dir = filp->private_data;
	struct event_subsystem *system = dir->subsystem;
1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
	struct trace_seq *s;
	int r;

	if (*ppos)
		return 0;

	s = kmalloc(sizeof(*s), GFP_KERNEL);
	if (!s)
		return -ENOMEM;

	trace_seq_init(s);

1253
	print_subsystem_event_filter(system, s);
1254 1255
	r = simple_read_from_buffer(ubuf, cnt, ppos,
				    s->buffer, trace_seq_used(s));
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265

	kfree(s);

	return r;
}

static ssize_t
subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
		       loff_t *ppos)
{
1266
	struct ftrace_subsystem_dir *dir = filp->private_data;
1267
	char *buf;
1268 1269
	int err;

1270
	if (cnt >= PAGE_SIZE)
1271 1272
		return -EINVAL;

1273 1274
	buf = (char *)__get_free_page(GFP_TEMPORARY);
	if (!buf)
1275 1276
		return -ENOMEM;

1277 1278 1279
	if (copy_from_user(buf, ubuf, cnt)) {
		free_page((unsigned long) buf);
		return -EFAULT;
1280
	}
1281
	buf[cnt] = '\0';
1282

1283
	err = apply_subsystem_event_filter(dir, buf);
1284 1285
	free_page((unsigned long) buf);
	if (err < 0)
1286
		return err;
1287 1288 1289 1290 1291 1292

	*ppos += cnt;

	return cnt;
}

1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
static ssize_t
show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
	int (*func)(struct trace_seq *s) = filp->private_data;
	struct trace_seq *s;
	int r;

	if (*ppos)
		return 0;

	s = kmalloc(sizeof(*s), GFP_KERNEL);
	if (!s)
		return -ENOMEM;

	trace_seq_init(s);

	func(s);
1310 1311
	r = simple_read_from_buffer(ubuf, cnt, ppos,
				    s->buffer, trace_seq_used(s));
1312 1313 1314 1315 1316 1317

	kfree(s);

	return r;
}

1318 1319
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
static int ftrace_event_set_open(struct inode *inode, struct file *file);
1320
static int ftrace_event_release(struct inode *inode, struct file *file);
1321

1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
static const struct seq_operations show_event_seq_ops = {
	.start = t_start,
	.next = t_next,
	.show = t_show,
	.stop = t_stop,
};

static const struct seq_operations show_set_event_seq_ops = {
	.start = s_start,
	.next = s_next,
	.show = t_show,
	.stop = t_stop,
};

1336
static const struct file_operations ftrace_avail_fops = {
1337
	.open = ftrace_event_avail_open,
1338 1339 1340 1341 1342
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};

1343
static const struct file_operations ftrace_set_event_fops = {
1344
	.open = ftrace_event_set_open,
1345 1346 1347
	.read = seq_read,
	.write = ftrace_event_write,
	.llseek = seq_lseek,
1348
	.release = ftrace_event_release,
1349 1350
};

1351
static const struct file_operations ftrace_enable_fops = {
1352
	.open = tracing_open_generic,
1353 1354
	.read = event_enable_read,
	.write = event_enable_write,
1355
	.llseek = default_llseek,
1356 1357
};

1358
static const struct file_operations ftrace_event_format_fops = {
1359 1360 1361 1362
	.open = trace_format_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
1363 1364
};

1365 1366
static const struct file_operations ftrace_event_id_fops = {
	.read = event_id_read,
1367
	.llseek = default_llseek,
1368 1369
};

1370 1371 1372 1373
static const struct file_operations ftrace_event_filter_fops = {
	.open = tracing_open_generic,
	.read = event_filter_read,
	.write = event_filter_write,
1374
	.llseek = default_llseek,
1375 1376
};

1377
static const struct file_operations ftrace_subsystem_filter_fops = {
1378
	.open = subsystem_open,
1379 1380
	.read = subsystem_filter_read,
	.write = subsystem_filter_write,
1381
	.llseek = default_llseek,
1382
	.release = subsystem_release,
1383 1384
};

1385
static const struct file_operations ftrace_system_enable_fops = {
1386
	.open = subsystem_open,
1387 1388
	.read = system_enable_read,
	.write = system_enable_write,
1389
	.llseek = default_llseek,
1390
	.release = subsystem_release,
1391 1392
};

1393 1394 1395 1396 1397 1398 1399 1400
static const struct file_operations ftrace_tr_enable_fops = {
	.open = system_tr_open,
	.read = system_enable_read,
	.write = system_enable_write,
	.llseek = default_llseek,
	.release = subsystem_release,
};

1401 1402 1403
static const struct file_operations ftrace_show_header_fops = {
	.open = tracing_open_generic,
	.read = show_header,
1404
	.llseek = default_llseek,
1405 1406
};

1407 1408 1409
static int
ftrace_event_open(struct inode *inode, struct file *file,
		  const struct seq_operations *seq_ops)
1410
{
1411 1412
	struct seq_file *m;
	int ret;
1413

1414 1415 1416 1417 1418 1419
	ret = seq_open(file, seq_ops);
	if (ret < 0)
		return ret;
	m = file->private_data;
	/* copy tr over to seq ops */
	m->private = inode->i_private;
1420

1421
	return ret;
1422 1423
}

1424 1425 1426 1427 1428 1429 1430 1431 1432
static int ftrace_event_release(struct inode *inode, struct file *file)
{
	struct trace_array *tr = inode->i_private;

	trace_array_put(tr);

	return seq_release(inode, file);
}

1433 1434 1435 1436 1437
static int
ftrace_event_avail_open(struct inode *inode, struct file *file)
{
	const struct seq_operations *seq_ops = &show_event_seq_ops;

1438
	return ftrace_event_open(inode, file, seq_ops);
1439 1440 1441 1442 1443 1444
}

static int
ftrace_event_set_open(struct inode *inode, struct file *file)
{
	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1445
	struct trace_array *tr = inode->i_private;
1446 1447 1448 1449
	int ret;

	if (trace_array_get(tr) < 0)
		return -ENODEV;
1450 1451 1452

	if ((file->f_mode & FMODE_WRITE) &&
	    (file->f_flags & O_TRUNC))
1453
		ftrace_clear_events(tr);
1454

1455 1456 1457 1458
	ret = ftrace_event_open(inode, file, seq_ops);
	if (ret < 0)
		trace_array_put(tr);
	return ret;
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
}

static struct event_subsystem *
create_new_subsystem(const char *name)
{
	struct event_subsystem *system;

	/* need to create new entry */
	system = kmalloc(sizeof(*system), GFP_KERNEL);
	if (!system)
		return NULL;

	system->ref_count = 1;
1472 1473 1474 1475 1476 1477 1478 1479 1480

	/* Only allocate if dynamic (kprobes and modules) */
	if (!core_kernel_data((unsigned long)name)) {
		system->ref_count |= SYSTEM_FL_FREE_NAME;
		system->name = kstrdup(name, GFP_KERNEL);
		if (!system->name)
			goto out_free;
	} else
		system->name = name;
1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492

	system->filter = NULL;

	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
	if (!system->filter)
		goto out_free;

	list_add(&system->list, &event_subsystems);

	return system;

 out_free:
1493 1494
	if (system->ref_count & SYSTEM_FL_FREE_NAME)
		kfree(system->name);
1495 1496
	kfree(system);
	return NULL;
1497 1498
}

1499
static struct dentry *
1500 1501
event_subsystem_dir(struct trace_array *tr, const char *name,
		    struct ftrace_event_file *file, struct dentry *parent)
1502
{
1503
	struct ftrace_subsystem_dir *dir;
1504
	struct event_subsystem *system;
1505
	struct dentry *entry;
1506 1507

	/* First see if we did not already create this dir */
1508 1509
	list_for_each_entry(dir, &tr->systems, list) {
		system = dir->subsystem;
1510
		if (strcmp(system->name, name) == 0) {
1511 1512 1513
			dir->nr_events++;
			file->system = dir;
			return dir->entry;
1514
		}
1515 1516
	}

1517 1518 1519 1520
	/* Now see if the system itself exists. */
	list_for_each_entry(system, &event_subsystems, list) {
		if (strcmp(system->name, name) == 0)
			break;
1521
	}
1522 1523 1524
	/* Reset system variable when not found */
	if (&system->list == &event_subsystems)
		system = NULL;
1525

1526 1527 1528
	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
	if (!dir)
		goto out_fail;
1529

1530 1531 1532 1533 1534 1535 1536
	if (!system) {
		system = create_new_subsystem(name);
		if (!system)
			goto out_free;
	} else
		__get_system(system);

1537
	dir->entry = tracefs_create_dir(name, parent);
1538
	if (!dir->entry) {
1539
		pr_warn("Failed to create system directory %s\n", name);
1540 1541
		__put_system(system);
		goto out_free;
1542 1543
	}

1544 1545 1546 1547 1548
	dir->tr = tr;
	dir->ref_count = 1;
	dir->nr_events = 1;
	dir->subsystem = system;
	file->system = dir;
1549

1550
	entry = tracefs_create_file("filter", 0644, dir->entry, dir,
1551
				    &ftrace_subsystem_filter_fops);
1552 1553 1554
	if (!entry) {
		kfree(system->filter);
		system->filter = NULL;
1555
		pr_warn("Could not create tracefs '%s/filter' entry\n", name);
1556
	}
1557

1558
	trace_create_file("enable", 0644, dir->entry, dir,
1559
			  &ftrace_system_enable_fops);
1560

1561 1562 1563 1564 1565 1566 1567 1568 1569
	list_add(&dir->list, &tr->systems);

	return dir->entry;

 out_free:
	kfree(dir);
 out_fail:
	/* Only print this message if failed on memory allocation */
	if (!dir || !system)
1570
		pr_warn("No memory to create event subsystem %s\n", name);
1571
	return NULL;
1572 1573
}

1574
static int
1575
event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
1576
{
1577 1578
	struct ftrace_event_call *call = file->event_call;
	struct trace_array *tr = file->tr;
1579
	struct list_head *head;
1580
	struct dentry *d_events;
1581
	const char *name;
1582
	int ret;
1583

1584 1585 1586 1587
	/*
	 * If the trace point header did not define TRACE_SYSTEM
	 * then the system would be called "TRACE_SYSTEM".
	 */
1588 1589 1590 1591 1592 1593 1594
	if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
		d_events = event_subsystem_dir(tr, call->class->system, file, parent);
		if (!d_events)
			return -ENOMEM;
	} else
		d_events = parent;

1595
	name = ftrace_event_name(call);
1596
	file->dir = tracefs_create_dir(name, d_events);
1597
	if (!file->dir) {
1598
		pr_warn("Could not create tracefs '%s' directory\n", name);
1599 1600 1601
		return -1;
	}

1602
	if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1603
		trace_create_file("enable", 0644, file->dir, file,
1604
				  &ftrace_enable_fops);
1605

1606
#ifdef CONFIG_PERF_EVENTS
1607
	if (call->event.type && call->class->reg)
1608
		trace_create_file("id", 0444, file->dir,
1609 1610
				  (void *)(long)call->event.type,
				  &ftrace_event_id_fops);
1611
#endif
1612

1613 1614 1615 1616 1617 1618 1619 1620
	/*
	 * Other events may have the same class. Only update
	 * the fields if they are not already defined.
	 */
	head = trace_get_fields(call);
	if (list_empty(head)) {
		ret = call->class->define_fields(call);
		if (ret < 0) {
1621 1622
			pr_warn("Could not initialize trace point events/%s\n",
				name);
1623
			return -1;
1624 1625
		}
	}
1626
	trace_create_file("filter", 0644, file->dir, file,
1627
			  &ftrace_event_filter_fops);
1628

1629 1630 1631
	trace_create_file("trigger", 0644, file->dir, file,
			  &event_trigger_fops);

1632
	trace_create_file("format", 0444, file->dir, call,
1633
			  &ftrace_event_format_fops);
1634 1635 1636 1637

	return 0;
}

1638 1639 1640 1641 1642 1643 1644 1645 1646
static void remove_event_from_tracers(struct ftrace_event_call *call)
{
	struct ftrace_event_file *file;
	struct trace_array *tr;

	do_for_each_event_file_safe(tr, file) {
		if (file->event_call != call)
			continue;

1647
		remove_event_file_dir(file);
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
		/*
		 * The do_for_each_event_file_safe() is
		 * a double loop. After finding the call for this
		 * trace_array, we use break to jump to the next
		 * trace_array.
		 */
		break;
	} while_for_each_event_file();
}

1658 1659
static void event_remove(struct ftrace_event_call *call)
{
1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
	struct trace_array *tr;
	struct ftrace_event_file *file;

	do_for_each_event_file(tr, file) {
		if (file->event_call != call)
			continue;
		ftrace_event_enable_disable(file, 0);
		/*
		 * The do_for_each_event_file() is
		 * a double loop. After finding the call for this
		 * trace_array, we use break to jump to the next
		 * trace_array.
		 */
		break;
	} while_for_each_event_file();

1676 1677
	if (call->event.funcs)
		__unregister_ftrace_event(&call->event);
1678
	remove_event_from_tracers(call);
1679 1680 1681 1682 1683 1684
	list_del(&call->list);
}

static int event_init(struct ftrace_event_call *call)
{
	int ret = 0;
1685
	const char *name;
1686

1687 1688
	name = ftrace_event_name(call);
	if (WARN_ON(!name))
1689 1690 1691 1692 1693
		return -EINVAL;

	if (call->class->raw_init) {
		ret = call->class->raw_init(call);
		if (ret < 0 && ret != -ENOSYS)
1694
			pr_warn("Could not initialize trace events/%s\n", name);
1695 1696 1697 1698 1699
	}

	return ret;
}

1700
static int
1701
__register_event(struct ftrace_event_call *call, struct module *mod)
1702 1703
{
	int ret;
1704

1705 1706 1707
	ret = event_init(call);
	if (ret < 0)
		return ret;
1708

1709
	list_add(&call->list, &ftrace_events);
1710
	call->mod = mod;
1711

1712
	return 0;
1713 1714
}

1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
{
	int rlen;
	int elen;

	/* Find the length of the enum value as a string */
	elen = snprintf(ptr, 0, "%ld", map->enum_value);
	/* Make sure there's enough room to replace the string with the value */
	if (len < elen)
		return NULL;

	snprintf(ptr, elen + 1, "%ld", map->enum_value);

	/* Get the rest of the string of ptr */
	rlen = strlen(ptr + len);
	memmove(ptr + elen, ptr + len, rlen);
	/* Make sure we end the new string */
	ptr[elen + rlen] = 0;

	return ptr + elen;
}

static void update_event_printk(struct ftrace_event_call *call,
				struct trace_enum_map *map)
{
	char *ptr;
	int quote = 0;
	int len = strlen(map->enum_string);

	for (ptr = call->print_fmt; *ptr; ptr++) {
		if (*ptr == '\\') {
			ptr++;
			/* paranoid */
			if (!*ptr)
				break;
			continue;
		}
		if (*ptr == '"') {
			quote ^= 1;
			continue;
		}
		if (quote)
			continue;
		if (isdigit(*ptr)) {
			/* skip numbers */
			do {
				ptr++;
				/* Check for alpha chars like ULL */
			} while (isalnum(*ptr));
1764 1765
			if (!*ptr)
				break;
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791
			/*
			 * A number must have some kind of delimiter after
			 * it, and we can ignore that too.
			 */
			continue;
		}
		if (isalpha(*ptr) || *ptr == '_') {
			if (strncmp(map->enum_string, ptr, len) == 0 &&
			    !isalnum(ptr[len]) && ptr[len] != '_') {
				ptr = enum_replace(ptr, map, len);
				/* Hmm, enum string smaller than value */
				if (WARN_ON_ONCE(!ptr))
					return;
				/*
				 * No need to decrement here, as enum_replace()
				 * returns the pointer to the character passed
				 * the enum, and two enums can not be placed
				 * back to back without something in between.
				 * We can skip that something in between.
				 */
				continue;
			}
		skip_more:
			do {
				ptr++;
			} while (isalnum(*ptr) || *ptr == '_');
1792 1793
			if (!*ptr)
				break;
1794 1795 1796 1797 1798 1799
			/*
			 * If what comes after this variable is a '.' or
			 * '->' then we can continue to ignore that string.
			 */
			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
				ptr += *ptr == '.' ? 1 : 2;
1800 1801
				if (!*ptr)
					break;
1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839
				goto skip_more;
			}
			/*
			 * Once again, we can skip the delimiter that came
			 * after the string.
			 */
			continue;
		}
	}
}

void trace_event_enum_update(struct trace_enum_map **map, int len)
{
	struct ftrace_event_call *call, *p;
	const char *last_system = NULL;
	int last_i;
	int i;

	down_write(&trace_event_sem);
	list_for_each_entry_safe(call, p, &ftrace_events, list) {
		/* events are usually grouped together with systems */
		if (!last_system || call->class->system != last_system) {
			last_i = 0;
			last_system = call->class->system;
		}

		for (i = last_i; i < len; i++) {
			if (call->class->system == map[i]->system) {
				/* Save the first system if need be */
				if (!last_i)
					last_i = i;
				update_event_printk(call, map[i]);
			}
		}
	}
	up_write(&trace_event_sem);
}

1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
static struct ftrace_event_file *
trace_create_new_event(struct ftrace_event_call *call,
		       struct trace_array *tr)
{
	struct ftrace_event_file *file;

	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
	if (!file)
		return NULL;

	file->event_call = call;
	file->tr = tr;
	atomic_set(&file->sm_ref, 0);
1853 1854
	atomic_set(&file->tm_ref, 0);
	INIT_LIST_HEAD(&file->triggers);
1855 1856 1857 1858 1859
	list_add(&file->list, &tr->events);

	return file;
}

1860 1861
/* Add an event to a trace directory */
static int
1862
__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr)
1863 1864 1865
{
	struct ftrace_event_file *file;

1866
	file = trace_create_new_event(call, tr);
1867 1868 1869
	if (!file)
		return -ENOMEM;

1870
	return event_create_dir(tr->event_dir, file);
1871 1872
}

1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
/*
 * Just create a decriptor for early init. A descriptor is required
 * for enabling events at boot. We want to enable events before
 * the filesystem is initialized.
 */
static __init int
__trace_early_add_new_event(struct ftrace_event_call *call,
			    struct trace_array *tr)
{
	struct ftrace_event_file *file;

1884
	file = trace_create_new_event(call, tr);
1885 1886 1887 1888 1889 1890
	if (!file)
		return -ENOMEM;

	return 0;
}

1891
struct ftrace_module_file_ops;
1892
static void __add_event_to_tracers(struct ftrace_event_call *call);
1893

1894 1895 1896 1897
/* Add an additional event_call dynamically */
int trace_add_event_call(struct ftrace_event_call *call)
{
	int ret;
1898
	mutex_lock(&trace_types_lock);
1899
	mutex_lock(&event_mutex);
1900

1901 1902
	ret = __register_event(call, NULL);
	if (ret >= 0)
1903
		__add_event_to_tracers(call);
1904

1905
	mutex_unlock(&event_mutex);
1906
	mutex_unlock(&trace_types_lock);
1907
	return ret;
1908 1909
}

1910
/*
1911 1912
 * Must be called under locking of trace_types_lock, event_mutex and
 * trace_event_sem.
1913
 */
1914 1915
static void __trace_remove_event_call(struct ftrace_event_call *call)
{
1916
	event_remove(call);
1917
	trace_destroy_fields(call);
1918 1919
	free_event_filter(call->filter);
	call->filter = NULL;
1920 1921
}

1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940
static int probe_remove_event_call(struct ftrace_event_call *call)
{
	struct trace_array *tr;
	struct ftrace_event_file *file;

#ifdef CONFIG_PERF_EVENTS
	if (call->perf_refcount)
		return -EBUSY;
#endif
	do_for_each_event_file(tr, file) {
		if (file->event_call != call)
			continue;
		/*
		 * We can't rely on ftrace_event_enable_disable(enable => 0)
		 * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
		 * TRACE_REG_UNREGISTER.
		 */
		if (file->flags & FTRACE_EVENT_FL_ENABLED)
			return -EBUSY;
1941 1942 1943 1944 1945 1946
		/*
		 * The do_for_each_event_file_safe() is
		 * a double loop. After finding the call for this
		 * trace_array, we use break to jump to the next
		 * trace_array.
		 */
1947 1948 1949 1950 1951 1952 1953 1954
		break;
	} while_for_each_event_file();

	__trace_remove_event_call(call);

	return 0;
}

1955
/* Remove an event_call */
1956
int trace_remove_event_call(struct ftrace_event_call *call)
1957
{
1958 1959
	int ret;

1960
	mutex_lock(&trace_types_lock);
1961
	mutex_lock(&event_mutex);
1962
	down_write(&trace_event_sem);
1963
	ret = probe_remove_event_call(call);
1964
	up_write(&trace_event_sem);
1965
	mutex_unlock(&event_mutex);
1966
	mutex_unlock(&trace_types_lock);
1967 1968

	return ret;
1969 1970 1971 1972 1973 1974 1975 1976 1977
}

#define for_each_event(event, start, end)			\
	for (event = start;					\
	     (unsigned long)event < (unsigned long)end;		\
	     event++)

#ifdef CONFIG_MODULES

1978 1979
static void trace_module_add_events(struct module *mod)
{
1980
	struct ftrace_event_call **call, **start, **end;
1981

1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
	if (!mod->num_trace_events)
		return;

	/* Don't add infrastructure for mods without tracepoints */
	if (trace_module_has_bad_taint(mod)) {
		pr_err("%s: module has bad taint, not creating trace events\n",
		       mod->name);
		return;
	}

1992 1993 1994 1995
	start = mod->trace_events;
	end = mod->trace_events + mod->num_trace_events;

	for_each_event(call, start, end) {
1996
		__register_event(*call, mod);
1997
		__add_event_to_tracers(*call);
1998 1999 2000 2001 2002 2003
	}
}

static void trace_module_remove_events(struct module *mod)
{
	struct ftrace_event_call *call, *p;
2004
	bool clear_trace = false;
2005

2006
	down_write(&trace_event_sem);
2007 2008
	list_for_each_entry_safe(call, p, &ftrace_events, list) {
		if (call->mod == mod) {
2009 2010
			if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
				clear_trace = true;
2011
			__trace_remove_event_call(call);
2012 2013
		}
	}
2014
	up_write(&trace_event_sem);
2015 2016 2017

	/*
	 * It is safest to reset the ring buffer if the module being unloaded
2018 2019 2020 2021 2022
	 * registered any events that were used. The only worry is if
	 * a new module gets loaded, and takes on the same id as the events
	 * of this module. When printing out the buffer, traced events left
	 * over from this module may be passed to the new module events and
	 * unexpected results may occur.
2023
	 */
2024
	if (clear_trace)
2025
		tracing_reset_all_online_cpus();
2026 2027
}

2028 2029
static int trace_module_notify(struct notifier_block *self,
			       unsigned long val, void *data)
2030 2031 2032
{
	struct module *mod = data;

2033
	mutex_lock(&trace_types_lock);
2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
	mutex_lock(&event_mutex);
	switch (val) {
	case MODULE_STATE_COMING:
		trace_module_add_events(mod);
		break;
	case MODULE_STATE_GOING:
		trace_module_remove_events(mod);
		break;
	}
	mutex_unlock(&event_mutex);
2044
	mutex_unlock(&trace_types_lock);
2045

2046 2047
	return 0;
}
2048

2049 2050
static struct notifier_block trace_module_nb = {
	.notifier_call = trace_module_notify,
2051
	.priority = 1, /* higher than trace.c module notify */
2052
};
2053
#endif /* CONFIG_MODULES */
2054

2055 2056 2057 2058 2059 2060 2061 2062
/* Create a new event directory structure for a trace directory. */
static void
__trace_add_event_dirs(struct trace_array *tr)
{
	struct ftrace_event_call *call;
	int ret;

	list_for_each_entry(call, &ftrace_events, list) {
2063
		ret = __trace_add_new_event(call, tr);
2064
		if (ret < 0)
2065 2066
			pr_warn("Could not create directory for event %s\n",
				ftrace_event_name(call));
2067 2068 2069
	}
}

2070
struct ftrace_event_file *
2071 2072 2073 2074
find_event_file(struct trace_array *tr, const char *system,  const char *event)
{
	struct ftrace_event_file *file;
	struct ftrace_event_call *call;
2075
	const char *name;
2076 2077 2078 2079

	list_for_each_entry(file, &tr->events, list) {

		call = file->event_call;
2080
		name = ftrace_event_name(call);
2081

2082
		if (!name || !call->class || !call->class->reg)
2083 2084 2085 2086 2087
			continue;

		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
			continue;

2088
		if (strcmp(event, name) == 0 &&
2089 2090 2091 2092 2093 2094
		    strcmp(system, call->class->system) == 0)
			return file;
	}
	return NULL;
}

2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107
#ifdef CONFIG_DYNAMIC_FTRACE

/* Avoid typos */
#define ENABLE_EVENT_STR	"enable_event"
#define DISABLE_EVENT_STR	"disable_event"

struct event_probe_data {
	struct ftrace_event_file	*file;
	unsigned long			count;
	int				ref;
	bool				enable;
};

2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155
static void
event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
{
	struct event_probe_data **pdata = (struct event_probe_data **)_data;
	struct event_probe_data *data = *pdata;

	if (!data)
		return;

	if (data->enable)
		clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
	else
		set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
}

static void
event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
{
	struct event_probe_data **pdata = (struct event_probe_data **)_data;
	struct event_probe_data *data = *pdata;

	if (!data)
		return;

	if (!data->count)
		return;

	/* Skip if the event is in a state we want to switch to */
	if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
		return;

	if (data->count != -1)
		(data->count)--;

	event_enable_probe(ip, parent_ip, _data);
}

static int
event_enable_print(struct seq_file *m, unsigned long ip,
		      struct ftrace_probe_ops *ops, void *_data)
{
	struct event_probe_data *data = _data;

	seq_printf(m, "%ps:", (void *)ip);

	seq_printf(m, "%s:%s:%s",
		   data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
		   data->file->event_call->class->system,
2156
		   ftrace_event_name(data->file->event_call));
2157 2158

	if (data->count == -1)
2159
		seq_puts(m, ":unlimited\n");
2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238
	else
		seq_printf(m, ":count=%ld\n", data->count);

	return 0;
}

static int
event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
		  void **_data)
{
	struct event_probe_data **pdata = (struct event_probe_data **)_data;
	struct event_probe_data *data = *pdata;

	data->ref++;
	return 0;
}

static void
event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
		  void **_data)
{
	struct event_probe_data **pdata = (struct event_probe_data **)_data;
	struct event_probe_data *data = *pdata;

	if (WARN_ON_ONCE(data->ref <= 0))
		return;

	data->ref--;
	if (!data->ref) {
		/* Remove the SOFT_MODE flag */
		__ftrace_event_enable_disable(data->file, 0, 1);
		module_put(data->file->event_call->mod);
		kfree(data);
	}
	*pdata = NULL;
}

static struct ftrace_probe_ops event_enable_probe_ops = {
	.func			= event_enable_probe,
	.print			= event_enable_print,
	.init			= event_enable_init,
	.free			= event_enable_free,
};

static struct ftrace_probe_ops event_enable_count_probe_ops = {
	.func			= event_enable_count_probe,
	.print			= event_enable_print,
	.init			= event_enable_init,
	.free			= event_enable_free,
};

static struct ftrace_probe_ops event_disable_probe_ops = {
	.func			= event_enable_probe,
	.print			= event_enable_print,
	.init			= event_enable_init,
	.free			= event_enable_free,
};

static struct ftrace_probe_ops event_disable_count_probe_ops = {
	.func			= event_enable_count_probe,
	.print			= event_enable_print,
	.init			= event_enable_init,
	.free			= event_enable_free,
};

static int
event_enable_func(struct ftrace_hash *hash,
		  char *glob, char *cmd, char *param, int enabled)
{
	struct trace_array *tr = top_trace_array();
	struct ftrace_event_file *file;
	struct ftrace_probe_ops *ops;
	struct event_probe_data *data;
	const char *system;
	const char *event;
	char *number;
	bool enable;
	int ret;

2239 2240 2241
	if (!tr)
		return -ENODEV;

2242
	/* hash funcs only work with set_ftrace_filter */
2243
	if (!enabled || !param)
2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300
		return -EINVAL;

	system = strsep(&param, ":");
	if (!param)
		return -EINVAL;

	event = strsep(&param, ":");

	mutex_lock(&event_mutex);

	ret = -EINVAL;
	file = find_event_file(tr, system, event);
	if (!file)
		goto out;

	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;

	if (enable)
		ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
	else
		ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;

	if (glob[0] == '!') {
		unregister_ftrace_function_probe_func(glob+1, ops);
		ret = 0;
		goto out;
	}

	ret = -ENOMEM;
	data = kzalloc(sizeof(*data), GFP_KERNEL);
	if (!data)
		goto out;

	data->enable = enable;
	data->count = -1;
	data->file = file;

	if (!param)
		goto out_reg;

	number = strsep(&param, ":");

	ret = -EINVAL;
	if (!strlen(number))
		goto out_free;

	/*
	 * We use the callback data field (which is a pointer)
	 * as our counter.
	 */
	ret = kstrtoul(number, 0, &data->count);
	if (ret)
		goto out_free;

 out_reg:
	/* Don't let event modules unload while probe registered */
	ret = try_module_get(file->event_call->mod);
2301 2302
	if (!ret) {
		ret = -EBUSY;
2303
		goto out_free;
2304
	}
2305 2306 2307 2308 2309

	ret = __ftrace_event_enable_disable(file, 1, 1);
	if (ret < 0)
		goto out_put;
	ret = register_ftrace_function_probe(glob, ops, data);
2310 2311 2312 2313 2314
	/*
	 * The above returns on success the # of functions enabled,
	 * but if it didn't find any functions it returns zero.
	 * Consider no functions a failure too.
	 */
2315 2316
	if (!ret) {
		ret = -ENOENT;
2317
		goto out_disable;
2318 2319 2320 2321
	} else if (ret < 0)
		goto out_disable;
	/* Just return zero, not the number of enabled functions */
	ret = 0;
2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360
 out:
	mutex_unlock(&event_mutex);
	return ret;

 out_disable:
	__ftrace_event_enable_disable(file, 0, 1);
 out_put:
	module_put(file->event_call->mod);
 out_free:
	kfree(data);
	goto out;
}

static struct ftrace_func_command event_enable_cmd = {
	.name			= ENABLE_EVENT_STR,
	.func			= event_enable_func,
};

static struct ftrace_func_command event_disable_cmd = {
	.name			= DISABLE_EVENT_STR,
	.func			= event_enable_func,
};

static __init int register_event_cmds(void)
{
	int ret;

	ret = register_ftrace_command(&event_enable_cmd);
	if (WARN_ON(ret < 0))
		return ret;
	ret = register_ftrace_command(&event_disable_cmd);
	if (WARN_ON(ret < 0))
		unregister_ftrace_command(&event_enable_cmd);
	return ret;
}
#else
static inline int register_event_cmds(void) { return 0; }
#endif /* CONFIG_DYNAMIC_FTRACE */

2361 2362 2363
/*
 * The top level array has already had its ftrace_event_file
 * descriptors created in order to allow for early events to
2364
 * be recorded. This function is called after the tracefs has been
2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375
 * initialized, and we now have to create the files associated
 * to the events.
 */
static __init void
__trace_early_add_event_dirs(struct trace_array *tr)
{
	struct ftrace_event_file *file;
	int ret;


	list_for_each_entry(file, &tr->events, list) {
2376
		ret = event_create_dir(tr->event_dir, file);
2377
		if (ret < 0)
2378 2379
			pr_warn("Could not create directory for event %s\n",
				ftrace_event_name(file->event_call));
2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401
	}
}

/*
 * For early boot up, the top trace array requires to have
 * a list of events that can be enabled. This must be done before
 * the filesystem is set up in order to allow events to be traced
 * early.
 */
static __init void
__trace_early_add_events(struct trace_array *tr)
{
	struct ftrace_event_call *call;
	int ret;

	list_for_each_entry(call, &ftrace_events, list) {
		/* Early boot up should not have any modules loaded */
		if (WARN_ON_ONCE(call->mod))
			continue;

		ret = __trace_early_add_new_event(call, tr);
		if (ret < 0)
2402 2403
			pr_warn("Could not create early event %s\n",
				ftrace_event_name(call));
2404 2405 2406
	}
}

2407 2408 2409 2410 2411 2412
/* Remove the event directory structure for a trace directory. */
static void
__trace_remove_event_dirs(struct trace_array *tr)
{
	struct ftrace_event_file *file, *next;

2413 2414
	list_for_each_entry_safe(file, next, &tr->events, list)
		remove_event_file_dir(file);
2415 2416
}

2417
static void __add_event_to_tracers(struct ftrace_event_call *call)
2418 2419 2420
{
	struct trace_array *tr;

2421 2422
	list_for_each_entry(tr, &ftrace_trace_arrays, list)
		__trace_add_new_event(call, tr);
2423 2424
}

2425 2426
extern struct ftrace_event_call *__start_ftrace_events[];
extern struct ftrace_event_call *__stop_ftrace_events[];
2427

2428 2429 2430 2431 2432
static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;

static __init int setup_trace_event(char *str)
{
	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2433 2434
	ring_buffer_expanded = true;
	tracing_selftest_disabled = true;
2435 2436 2437 2438 2439

	return 1;
}
__setup("trace_event=", setup_trace_event);

2440 2441 2442
/* Expects to have event_mutex held when called */
static int
create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2443 2444 2445 2446
{
	struct dentry *d_events;
	struct dentry *entry;

2447
	entry = tracefs_create_file("set_event", 0644, parent,
2448 2449
				    tr, &ftrace_set_event_fops);
	if (!entry) {
2450
		pr_warn("Could not create tracefs 'set_event' entry\n");
2451 2452 2453
		return -ENOMEM;
	}

2454
	d_events = tracefs_create_dir("events", parent);
2455
	if (!d_events) {
2456
		pr_warn("Could not create tracefs 'events' directory\n");
2457 2458
		return -ENOMEM;
	}
2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472

	/* ring buffer internal formats */
	trace_create_file("header_page", 0444, d_events,
			  ring_buffer_print_page_header,
			  &ftrace_show_header_fops);

	trace_create_file("header_event", 0444, d_events,
			  ring_buffer_print_entry_header,
			  &ftrace_show_header_fops);

	trace_create_file("enable", 0644, d_events,
			  tr, &ftrace_tr_enable_fops);

	tr->event_dir = d_events;
2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497

	return 0;
}

/**
 * event_trace_add_tracer - add a instance of a trace_array to events
 * @parent: The parent dentry to place the files/directories for events in
 * @tr: The trace array associated with these events
 *
 * When a new instance is created, it needs to set up its events
 * directory, as well as other files associated with events. It also
 * creates the event hierachry in the @parent/events directory.
 *
 * Returns 0 on success.
 */
int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
{
	int ret;

	mutex_lock(&event_mutex);

	ret = create_event_toplevel_files(parent, tr);
	if (ret)
		goto out_unlock;

2498
	down_write(&trace_event_sem);
2499
	__trace_add_event_dirs(tr);
2500
	up_write(&trace_event_sem);
2501

2502
 out_unlock:
2503
	mutex_unlock(&event_mutex);
2504

2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522
	return ret;
}

/*
 * The top trace array already had its file descriptors created.
 * Now the files themselves need to be created.
 */
static __init int
early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
{
	int ret;

	mutex_lock(&event_mutex);

	ret = create_event_toplevel_files(parent, tr);
	if (ret)
		goto out_unlock;

2523
	down_write(&trace_event_sem);
2524
	__trace_early_add_event_dirs(tr);
2525
	up_write(&trace_event_sem);
2526 2527 2528 2529 2530

 out_unlock:
	mutex_unlock(&event_mutex);

	return ret;
2531 2532
}

2533 2534 2535 2536
int event_trace_del_tracer(struct trace_array *tr)
{
	mutex_lock(&event_mutex);

2537 2538 2539
	/* Disable any event triggers and associated soft-disabled events */
	clear_event_triggers(tr);

2540 2541 2542
	/* Disable any running events */
	__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);

2543 2544 2545
	/* Access to events are within rcu_read_lock_sched() */
	synchronize_sched();

2546
	down_write(&trace_event_sem);
2547
	__trace_remove_event_dirs(tr);
2548
	tracefs_remove_recursive(tr->event_dir);
2549
	up_write(&trace_event_sem);
2550 2551 2552 2553 2554 2555 2556 2557

	tr->event_dir = NULL;

	mutex_unlock(&event_mutex);

	return 0;
}

2558 2559 2560 2561 2562 2563 2564
static __init int event_trace_memsetup(void)
{
	field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
	file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC);
	return 0;
}

2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593
static __init void
early_enable_events(struct trace_array *tr, bool disable_first)
{
	char *buf = bootup_event_buf;
	char *token;
	int ret;

	while (true) {
		token = strsep(&buf, ",");

		if (!token)
			break;
		if (!*token)
			continue;

		/* Restarting syscalls requires that we stop them first */
		if (disable_first)
			ftrace_set_clr_event(tr, token, 0);

		ret = ftrace_set_clr_event(tr, token, 1);
		if (ret)
			pr_warn("Failed to enable trace event: %s\n", token);

		/* Put back the comma to allow this to be called again */
		if (buf)
			*(buf - 1) = ',';
	}
}

2594 2595
static __init int event_trace_enable(void)
{
2596
	struct trace_array *tr = top_trace_array();
2597 2598 2599
	struct ftrace_event_call **iter, *call;
	int ret;

2600 2601 2602
	if (!tr)
		return -ENODEV;

2603 2604 2605 2606 2607 2608 2609 2610
	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {

		call = *iter;
		ret = event_init(call);
		if (!ret)
			list_add(&call->list, &ftrace_events);
	}

2611 2612 2613 2614 2615 2616 2617 2618
	/*
	 * We need the top trace array to have a working set of trace
	 * points at early init, before the debug files and directories
	 * are created. Create the file entries now, and attach them
	 * to the actual file dentries later.
	 */
	__trace_early_add_events(tr);

2619
	early_enable_events(tr, false);
2620 2621 2622

	trace_printk_start_comm();

2623 2624
	register_event_cmds();

2625 2626
	register_trigger_cmds();

2627 2628 2629
	return 0;
}

2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654
/*
 * event_trace_enable() is called from trace_event_init() first to
 * initialize events and perhaps start any events that are on the
 * command line. Unfortunately, there are some events that will not
 * start this early, like the system call tracepoints that need
 * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
 * is called before pid 1 starts, and this flag is never set, making
 * the syscall tracepoint never get reached, but the event is enabled
 * regardless (and not doing anything).
 */
static __init int event_trace_enable_again(void)
{
	struct trace_array *tr;

	tr = top_trace_array();
	if (!tr)
		return -ENODEV;

	early_enable_events(tr, true);

	return 0;
}

early_initcall(event_trace_enable_again);

2655 2656
static __init int event_trace_init(void)
{
2657
	struct trace_array *tr;
2658 2659
	struct dentry *d_tracer;
	struct dentry *entry;
2660
	int ret;
2661

2662
	tr = top_trace_array();
2663 2664
	if (!tr)
		return -ENODEV;
2665

2666
	d_tracer = tracing_init_dentry();
2667
	if (IS_ERR(d_tracer))
2668 2669
		return 0;

2670
	entry = tracefs_create_file("available_events", 0444, d_tracer,
2671
				    tr, &ftrace_avail_fops);
2672
	if (!entry)
2673
		pr_warn("Could not create tracefs 'available_events' entry\n");
2674

2675
	if (trace_define_common_fields())
2676
		pr_warn("tracing: Failed to allocate common fields");
2677

2678
	ret = early_event_add_tracer(d_tracer, tr);
2679 2680
	if (ret)
		return ret;
2681

2682
#ifdef CONFIG_MODULES
2683
	ret = register_module_notifier(&trace_module_nb);
2684
	if (ret)
2685
		pr_warn("Failed to register trace events module notifier\n");
2686
#endif
2687 2688
	return 0;
}
2689 2690 2691 2692 2693 2694 2695 2696

void __init trace_event_init(void)
{
	event_trace_memsetup();
	init_ftrace_syscalls();
	event_trace_enable();
}

2697
fs_initcall(event_trace_init);
2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730

#ifdef CONFIG_FTRACE_STARTUP_TEST

static DEFINE_SPINLOCK(test_spinlock);
static DEFINE_SPINLOCK(test_spinlock_irq);
static DEFINE_MUTEX(test_mutex);

static __init void test_work(struct work_struct *dummy)
{
	spin_lock(&test_spinlock);
	spin_lock_irq(&test_spinlock_irq);
	udelay(1);
	spin_unlock_irq(&test_spinlock_irq);
	spin_unlock(&test_spinlock);

	mutex_lock(&test_mutex);
	msleep(1);
	mutex_unlock(&test_mutex);
}

static __init int event_test_thread(void *unused)
{
	void *test_malloc;

	test_malloc = kmalloc(1234, GFP_KERNEL);
	if (!test_malloc)
		pr_info("failed to kmalloc\n");

	schedule_on_each_cpu(test_work);

	kfree(test_malloc);

	set_current_state(TASK_INTERRUPTIBLE);
2731
	while (!kthread_should_stop()) {
2732
		schedule();
2733 2734 2735
		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);
2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755

	return 0;
}

/*
 * Do various things that may trigger events.
 */
static __init void event_test_stuff(void)
{
	struct task_struct *test_thread;

	test_thread = kthread_run(event_test_thread, NULL, "test-events");
	msleep(1);
	kthread_stop(test_thread);
}

/*
 * For every trace event defined, we will test each trace point separately,
 * and then by groups, and finally all trace points.
 */
2756
static __init void event_trace_self_tests(void)
2757
{
2758 2759
	struct ftrace_subsystem_dir *dir;
	struct ftrace_event_file *file;
2760 2761
	struct ftrace_event_call *call;
	struct event_subsystem *system;
2762
	struct trace_array *tr;
2763 2764
	int ret;

2765
	tr = top_trace_array();
2766 2767
	if (!tr)
		return;
2768

2769 2770
	pr_info("Running tests on trace events:\n");

2771 2772 2773
	list_for_each_entry(file, &tr->events, list) {

		call = file->event_call;
2774

2775 2776
		/* Only test those that have a probe */
		if (!call->class || !call->class->probe)
2777 2778
			continue;

2779 2780 2781 2782 2783 2784 2785
/*
 * Testing syscall events here is pretty useless, but
 * we still do it if configured. But this is time consuming.
 * What we really need is a user thread to perform the
 * syscalls as we test.
 */
#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
2786 2787
		if (call->class->system &&
		    strcmp(call->class->system, "syscalls") == 0)
2788 2789 2790
			continue;
#endif

2791
		pr_info("Testing event %s: ", ftrace_event_name(call));
2792 2793 2794 2795 2796

		/*
		 * If an event is already enabled, someone is using
		 * it and the self test should not be on.
		 */
2797
		if (file->flags & FTRACE_EVENT_FL_ENABLED) {
2798
			pr_warn("Enabled event during self test!\n");
2799 2800 2801 2802
			WARN_ON_ONCE(1);
			continue;
		}

2803
		ftrace_event_enable_disable(file, 1);
2804
		event_test_stuff();
2805
		ftrace_event_enable_disable(file, 0);
2806 2807 2808 2809 2810 2811 2812 2813

		pr_cont("OK\n");
	}

	/* Now test at the sub system level */

	pr_info("Running tests on trace event systems:\n");

2814 2815 2816
	list_for_each_entry(dir, &tr->systems, list) {

		system = dir->subsystem;
2817 2818 2819 2820 2821 2822 2823

		/* the ftrace system is special, skip it */
		if (strcmp(system->name, "ftrace") == 0)
			continue;

		pr_info("Testing event system %s: ", system->name);

2824
		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
2825
		if (WARN_ON_ONCE(ret)) {
2826 2827
			pr_warn("error enabling system %s\n",
				system->name);
2828 2829 2830 2831 2832
			continue;
		}

		event_test_stuff();

2833
		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
2834
		if (WARN_ON_ONCE(ret)) {
2835 2836
			pr_warn("error disabling system %s\n",
				system->name);
2837 2838
			continue;
		}
2839 2840 2841 2842 2843 2844 2845 2846 2847

		pr_cont("OK\n");
	}

	/* Test with all events enabled */

	pr_info("Running tests on all trace events:\n");
	pr_info("Testing all events: ");

2848
	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
2849
	if (WARN_ON_ONCE(ret)) {
2850
		pr_warn("error enabling all events\n");
2851
		return;
2852 2853 2854 2855 2856
	}

	event_test_stuff();

	/* reset sysname */
2857
	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
2858
	if (WARN_ON_ONCE(ret)) {
2859
		pr_warn("error disabling all events\n");
2860
		return;
2861 2862 2863
	}

	pr_cont("OK\n");
2864 2865 2866 2867
}

#ifdef CONFIG_FUNCTION_TRACER

2868
static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
2869 2870

static void
2871
function_test_events_call(unsigned long ip, unsigned long parent_ip,
2872
			  struct ftrace_ops *op, struct pt_regs *pt_regs)
2873 2874
{
	struct ring_buffer_event *event;
2875
	struct ring_buffer *buffer;
2876 2877 2878 2879 2880 2881 2882
	struct ftrace_entry *entry;
	unsigned long flags;
	long disabled;
	int cpu;
	int pc;

	pc = preempt_count();
2883
	preempt_disable_notrace();
2884
	cpu = raw_smp_processor_id();
2885
	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
2886 2887 2888 2889 2890 2891

	if (disabled != 1)
		goto out;

	local_save_flags(flags);

2892 2893
	event = trace_current_buffer_lock_reserve(&buffer,
						  TRACE_FN, sizeof(*entry),
2894 2895 2896 2897 2898 2899 2900
						  flags, pc);
	if (!event)
		goto out;
	entry	= ring_buffer_event_data(event);
	entry->ip			= ip;
	entry->parent_ip		= parent_ip;

2901
	trace_buffer_unlock_commit(buffer, event, flags, pc);
2902 2903

 out:
2904
	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
2905
	preempt_enable_notrace();
2906 2907 2908 2909 2910
}

static struct ftrace_ops trace_ops __initdata  =
{
	.func = function_test_events_call,
2911
	.flags = FTRACE_OPS_FL_RECURSION_SAFE,
2912 2913 2914 2915
};

static __init void event_trace_self_test_with_function(void)
{
2916 2917 2918 2919 2920 2921
	int ret;
	ret = register_ftrace_function(&trace_ops);
	if (WARN_ON(ret < 0)) {
		pr_info("Failed to enable function tracer for event tests\n");
		return;
	}
2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933
	pr_info("Running tests again, along with the function tracer\n");
	event_trace_self_tests();
	unregister_ftrace_function(&trace_ops);
}
#else
static __init void event_trace_self_test_with_function(void)
{
}
#endif

static __init int event_trace_self_tests_init(void)
{
2934 2935 2936 2937
	if (!tracing_selftest_disabled) {
		event_trace_self_tests();
		event_trace_self_test_with_function();
	}
2938 2939 2940 2941

	return 0;
}

2942
late_initcall(event_trace_self_tests_init);
2943 2944

#endif