dir.c 35.5 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10

11
#include <linux/sched.h>
12 13 14 15 16 17 18 19 20
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

21
DEFINE_MUTEX(kernfs_mutex);
22 23
static DEFINE_SPINLOCK(kernfs_rename_lock);	/* kn->parent and ->name */
static char kernfs_pr_cont_buf[PATH_MAX];	/* protected by rename_lock */
24

25
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
26

Tejun Heo's avatar
Tejun Heo committed
27 28 29 30 31 32
static bool kernfs_active(struct kernfs_node *kn)
{
	lockdep_assert_held(&kernfs_mutex);
	return atomic_read(&kn->active) >= 0;
}

33 34 35 36 37 38 39 40 41
static bool kernfs_lockdep(struct kernfs_node *kn)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	return kn->flags & KERNFS_LOCKDEP;
#else
	return false;
#endif
}

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
{
	return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
}

static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
					      size_t buflen)
{
	char *p = buf + buflen;
	int len;

	*--p = '\0';

	do {
		len = strlen(kn->name);
		if (p - buf < len + 1) {
			buf[0] = '\0';
			p = NULL;
			break;
		}
		p -= len;
		memcpy(p, kn->name, len);
		*--p = '/';
		kn = kn->parent;
	} while (kn && kn->parent);

	return p;
}

/**
 * kernfs_name - obtain the name of a given node
 * @kn: kernfs_node of interest
 * @buf: buffer to copy @kn's name into
 * @buflen: size of @buf
 *
 * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
 * similar to strlcpy().  It returns the length of @kn's name and if @buf
 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
 *
 * This function can be called from any context.
 */
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&kernfs_rename_lock, flags);
	ret = kernfs_name_locked(kn, buf, buflen);
	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
	return ret;
}

/**
 * kernfs_path - build full path of a given node
 * @kn: kernfs_node of interest
 * @buf: buffer to copy @kn's name into
 * @buflen: size of @buf
 *
 * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
 * path is built from the end of @buf so the returned pointer usually
 * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
 * and %NULL is returned.
 */
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
{
	unsigned long flags;
	char *p;

	spin_lock_irqsave(&kernfs_rename_lock, flags);
	p = kernfs_path_locked(kn, buf, buflen);
	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
	return p;
}
Tejun Heo's avatar
Tejun Heo committed
115
EXPORT_SYMBOL_GPL(kernfs_path);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177

/**
 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
 * @kn: kernfs_node of interest
 *
 * This function can be called from any context.
 */
void pr_cont_kernfs_name(struct kernfs_node *kn)
{
	unsigned long flags;

	spin_lock_irqsave(&kernfs_rename_lock, flags);

	kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
	pr_cont("%s", kernfs_pr_cont_buf);

	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
}

/**
 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
 * @kn: kernfs_node of interest
 *
 * This function can be called from any context.
 */
void pr_cont_kernfs_path(struct kernfs_node *kn)
{
	unsigned long flags;
	char *p;

	spin_lock_irqsave(&kernfs_rename_lock, flags);

	p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
			       sizeof(kernfs_pr_cont_buf));
	if (p)
		pr_cont("%s", p);
	else
		pr_cont("<name too long>");

	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
}

/**
 * kernfs_get_parent - determine the parent node and pin it
 * @kn: kernfs_node of interest
 *
 * Determines @kn's parent, pins and returns it.  This function can be
 * called from any context.
 */
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
{
	struct kernfs_node *parent;
	unsigned long flags;

	spin_lock_irqsave(&kernfs_rename_lock, flags);
	parent = kn->parent;
	kernfs_get(parent);
	spin_unlock_irqrestore(&kernfs_rename_lock, flags);

	return parent;
}

178
/**
179
 *	kernfs_name_hash
180 181 182 183 184
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
185
static unsigned int kernfs_name_hash(const char *name, const void *ns)
186 187 188 189 190 191 192 193
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
194
	if (hash < 2)
195 196 197 198 199 200
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

201 202
static int kernfs_name_compare(unsigned int hash, const char *name,
			       const void *ns, const struct kernfs_node *kn)
203
{
204 205 206 207 208 209 210 211
	if (hash < kn->hash)
		return -1;
	if (hash > kn->hash)
		return 1;
	if (ns < kn->ns)
		return -1;
	if (ns > kn->ns)
		return 1;
212
	return strcmp(name, kn->name);
213 214
}

215 216
static int kernfs_sd_compare(const struct kernfs_node *left,
			     const struct kernfs_node *right)
217
{
218
	return kernfs_name_compare(left->hash, left->name, left->ns, right);
219 220 221
}

/**
222
 *	kernfs_link_sibling - link kernfs_node into sibling rbtree
223
 *	@kn: kernfs_node of interest
224
 *
225
 *	Link @kn into its sibling rbtree which starts from
226
 *	@kn->parent->dir.children.
227 228
 *
 *	Locking:
229
 *	mutex_lock(kernfs_mutex)
230 231 232 233
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
234
static int kernfs_link_sibling(struct kernfs_node *kn)
235
{
236
	struct rb_node **node = &kn->parent->dir.children.rb_node;
237 238 239
	struct rb_node *parent = NULL;

	while (*node) {
240
		struct kernfs_node *pos;
241 242
		int result;

243
		pos = rb_to_kn(*node);
244
		parent = *node;
245
		result = kernfs_sd_compare(kn, pos);
246
		if (result < 0)
247
			node = &pos->rb.rb_left;
248
		else if (result > 0)
249
			node = &pos->rb.rb_right;
250 251 252
		else
			return -EEXIST;
	}
Jianyu Zhan's avatar
Jianyu Zhan committed
253

254
	/* add new node and rebalance the tree */
255 256
	rb_link_node(&kn->rb, parent, node);
	rb_insert_color(&kn->rb, &kn->parent->dir.children);
Jianyu Zhan's avatar
Jianyu Zhan committed
257 258 259 260 261

	/* successfully added, account subdir number */
	if (kernfs_type(kn) == KERNFS_DIR)
		kn->parent->dir.subdirs++;

262 263 264 265
	return 0;
}

/**
266
 *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
267
 *	@kn: kernfs_node of interest
268
 *
269 270 271
 *	Try to unlink @kn from its sibling rbtree which starts from
 *	kn->parent->dir.children.  Returns %true if @kn was actually
 *	removed, %false if @kn wasn't on the rbtree.
272 273
 *
 *	Locking:
274
 *	mutex_lock(kernfs_mutex)
275
 */
276
static bool kernfs_unlink_sibling(struct kernfs_node *kn)
277
{
278 279 280
	if (RB_EMPTY_NODE(&kn->rb))
		return false;

281
	if (kernfs_type(kn) == KERNFS_DIR)
282
		kn->parent->dir.subdirs--;
283

284
	rb_erase(&kn->rb, &kn->parent->dir.children);
285 286
	RB_CLEAR_NODE(&kn->rb);
	return true;
287 288 289
}

/**
290
 *	kernfs_get_active - get an active reference to kernfs_node
291
 *	@kn: kernfs_node to get an active reference to
292
 *
293
 *	Get an active reference of @kn.  This function is noop if @kn
294 295 296
 *	is NULL.
 *
 *	RETURNS:
297
 *	Pointer to @kn on success, NULL on failure.
298
 */
299
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
300
{
301
	if (unlikely(!kn))
302 303
		return NULL;

304 305
	if (!atomic_inc_unless_negative(&kn->active))
		return NULL;
306

307
	if (kernfs_lockdep(kn))
308 309
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
	return kn;
310 311 312
}

/**
313
 *	kernfs_put_active - put an active reference to kernfs_node
314
 *	@kn: kernfs_node to put an active reference to
315
 *
316
 *	Put an active reference to @kn.  This function is noop if @kn
317 318
 *	is NULL.
 */
319
void kernfs_put_active(struct kernfs_node *kn)
320
{
321
	struct kernfs_root *root = kernfs_root(kn);
322 323
	int v;

324
	if (unlikely(!kn))
325 326
		return;

327
	if (kernfs_lockdep(kn))
328
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
329
	v = atomic_dec_return(&kn->active);
330
	if (likely(v != KN_DEACTIVATED_BIAS))
331 332
		return;

333
	wake_up_all(&root->deactivate_waitq);
334 335 336
}

/**
Tejun Heo's avatar
Tejun Heo committed
337 338
 * kernfs_drain - drain kernfs_node
 * @kn: kernfs_node to drain
339
 *
Tejun Heo's avatar
Tejun Heo committed
340 341 342
 * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
 * removers may invoke this function concurrently on @kn and all will
 * return after draining is complete.
343
 */
Tejun Heo's avatar
Tejun Heo committed
344
static void kernfs_drain(struct kernfs_node *kn)
345
	__releases(&kernfs_mutex) __acquires(&kernfs_mutex)
346
{
347
	struct kernfs_root *root = kernfs_root(kn);
348

349
	lockdep_assert_held(&kernfs_mutex);
Tejun Heo's avatar
Tejun Heo committed
350
	WARN_ON_ONCE(kernfs_active(kn));
351

352
	mutex_unlock(&kernfs_mutex);
353

354
	if (kernfs_lockdep(kn)) {
355 356 357 358
		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
		if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
			lock_contended(&kn->dep_map, _RET_IP_);
	}
359

360
	/* but everyone should wait for draining */
361 362
	wait_event(root->deactivate_waitq,
		   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
363

364
	if (kernfs_lockdep(kn)) {
365 366 367
		lock_acquired(&kn->dep_map, _RET_IP_);
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	}
368

369 370
	kernfs_unmap_bin_file(kn);

371
	mutex_lock(&kernfs_mutex);
372 373 374
}

/**
375 376
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
377
 */
378
void kernfs_get(struct kernfs_node *kn)
379
{
380
	if (kn) {
381 382
		WARN_ON(!atomic_read(&kn->count));
		atomic_inc(&kn->count);
383 384 385 386 387
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
388 389
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
390
 *
391
 * Put a reference count of @kn and destroy it if it reached zero.
392
 */
393
void kernfs_put(struct kernfs_node *kn)
394
{
395
	struct kernfs_node *parent;
396
	struct kernfs_root *root;
397

398
	if (!kn || !atomic_dec_and_test(&kn->count))
399
		return;
400
	root = kernfs_root(kn);
401
 repeat:
Tejun Heo's avatar
Tejun Heo committed
402 403
	/*
	 * Moving/renaming is always done while holding reference.
404
	 * kn->parent won't change beneath us.
405
	 */
406
	parent = kn->parent;
407

Tejun Heo's avatar
Tejun Heo committed
408 409 410
	WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
		  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
		  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
411

412
	if (kernfs_type(kn) == KERNFS_LINK)
413
		kernfs_put(kn->symlink.target_kn);
Tejun Heo's avatar
Tejun Heo committed
414 415 416

	kfree_const(kn->name);

417 418 419 420 421
	if (kn->iattr) {
		if (kn->iattr->ia_secdata)
			security_release_secctx(kn->iattr->ia_secdata,
						kn->iattr->ia_secdata_len);
		simple_xattrs_free(&kn->iattr->xattrs);
422
	}
423 424
	kfree(kn->iattr);
	ida_simple_remove(&root->ino_ida, kn->ino);
425
	kmem_cache_free(kernfs_node_cache, kn);
426

427 428
	kn = parent;
	if (kn) {
429
		if (atomic_dec_and_test(&kn->count))
430 431
			goto repeat;
	} else {
432
		/* just released the root kn, free @root too */
433
		ida_destroy(&root->ino_ida);
434 435
		kfree(root);
	}
436 437 438
}
EXPORT_SYMBOL_GPL(kernfs_put);

439
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
440
{
441
	struct kernfs_node *kn;
442 443 444 445

	if (flags & LOOKUP_RCU)
		return -ECHILD;

Tejun Heo's avatar
Tejun Heo committed
446 447 448 449
	/* Always perform fresh lookup for negatives */
	if (!dentry->d_inode)
		goto out_bad_unlocked;

450
	kn = dentry->d_fsdata;
451
	mutex_lock(&kernfs_mutex);
452

Tejun Heo's avatar
Tejun Heo committed
453 454
	/* The kernfs node has been deactivated */
	if (!kernfs_active(kn))
455 456
		goto out_bad;

457
	/* The kernfs node has been moved? */
458
	if (dentry->d_parent->d_fsdata != kn->parent)
459 460
		goto out_bad;

461
	/* The kernfs node has been renamed */
462
	if (strcmp(dentry->d_name.name, kn->name) != 0)
463 464
		goto out_bad;

465
	/* The kernfs node has been moved to a different namespace */
466
	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
467
	    kernfs_info(dentry->d_sb)->ns != kn->ns)
468 469
		goto out_bad;

470
	mutex_unlock(&kernfs_mutex);
471 472
	return 1;
out_bad:
473
	mutex_unlock(&kernfs_mutex);
Tejun Heo's avatar
Tejun Heo committed
474
out_bad_unlocked:
475 476 477
	return 0;
}

478
static void kernfs_dop_release(struct dentry *dentry)
479 480 481 482
{
	kernfs_put(dentry->d_fsdata);
}

483
const struct dentry_operations kernfs_dops = {
484 485
	.d_revalidate	= kernfs_dop_revalidate,
	.d_release	= kernfs_dop_release,
486 487
};

488 489 490 491 492 493 494 495 496 497 498 499 500
/**
 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
 * @dentry: the dentry in question
 *
 * Return the kernfs_node associated with @dentry.  If @dentry is not a
 * kernfs one, %NULL is returned.
 *
 * While the returned kernfs_node will stay accessible as long as @dentry
 * is accessible, the returned node can be in any state and the caller is
 * fully responsible for determining what's accessible.
 */
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{
501
	if (dentry->d_sb->s_op == &kernfs_sops)
502 503 504 505
		return dentry->d_fsdata;
	return NULL;
}

506 507 508
static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
					     const char *name, umode_t mode,
					     unsigned flags)
509
{
510
	struct kernfs_node *kn;
511
	int ret;
512

Tejun Heo's avatar
Tejun Heo committed
513 514 515
	name = kstrdup_const(name, GFP_KERNEL);
	if (!name)
		return NULL;
516

517
	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
518
	if (!kn)
519 520
		goto err_out1;

521 522 523 524 525 526 527 528
	/*
	 * If the ino of the sysfs entry created for a kmem cache gets
	 * allocated from an ida layer, which is accounted to the memcg that
	 * owns the cache, the memcg will get pinned forever. So do not account
	 * ino ida allocations.
	 */
	ret = ida_simple_get(&root->ino_ida, 1, 0,
			     GFP_KERNEL | __GFP_NOACCOUNT);
529
	if (ret < 0)
530
		goto err_out2;
531
	kn->ino = ret;
532

533
	atomic_set(&kn->count, 1);
Tejun Heo's avatar
Tejun Heo committed
534
	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
535
	RB_CLEAR_NODE(&kn->rb);
536

537 538
	kn->name = name;
	kn->mode = mode;
Tejun Heo's avatar
Tejun Heo committed
539
	kn->flags = flags;
540

541
	return kn;
542 543

 err_out2:
544
	kmem_cache_free(kernfs_node_cache, kn);
545
 err_out1:
Tejun Heo's avatar
Tejun Heo committed
546
	kfree_const(name);
547 548 549
	return NULL;
}

550 551 552 553 554 555 556 557 558 559 560 561 562 563
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
				    const char *name, umode_t mode,
				    unsigned flags)
{
	struct kernfs_node *kn;

	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
	if (kn) {
		kernfs_get(parent);
		kn->parent = parent;
	}
	return kn;
}

564
/**
565
 *	kernfs_add_one - add kernfs_node to parent without warning
566
 *	@kn: kernfs_node to be added
567
 *
568 569 570
 *	The caller must already have initialized @kn->parent.  This
 *	function increments nlink of the parent's inode if @kn is a
 *	directory and link into the children list of the parent.
571 572 573 574 575
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
Tejun Heo's avatar
Tejun Heo committed
576
int kernfs_add_one(struct kernfs_node *kn)
577
{
578
	struct kernfs_node *parent = kn->parent;
579
	struct kernfs_iattrs *ps_iattr;
Tejun Heo's avatar
Tejun Heo committed
580
	bool has_ns;
581 582
	int ret;

Tejun Heo's avatar
Tejun Heo committed
583 584 585 586 587 588 589
	mutex_lock(&kernfs_mutex);

	ret = -EINVAL;
	has_ns = kernfs_ns_enabled(parent);
	if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
		 has_ns ? "required" : "invalid", parent->name, kn->name))
		goto out_unlock;
590

591
	if (kernfs_type(parent) != KERNFS_DIR)
Tejun Heo's avatar
Tejun Heo committed
592
		goto out_unlock;
593

Tejun Heo's avatar
Tejun Heo committed
594
	ret = -ENOENT;
595
	if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
Tejun Heo's avatar
Tejun Heo committed
596
		goto out_unlock;
597

598
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
599

600
	ret = kernfs_link_sibling(kn);
601
	if (ret)
Tejun Heo's avatar
Tejun Heo committed
602
		goto out_unlock;
603 604

	/* Update timestamps on the parent */
605
	ps_iattr = parent->iattr;
606 607 608 609 610
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

611 612 613 614 615 616 617 618 619 620 621 622 623
	mutex_unlock(&kernfs_mutex);

	/*
	 * Activate the new node unless CREATE_DEACTIVATED is requested.
	 * If not activated here, the kernfs user is responsible for
	 * activating the node with kernfs_activate().  A node which hasn't
	 * been activated is not visible to userland and its removal won't
	 * trigger deactivation.
	 */
	if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
		kernfs_activate(kn);
	return 0;

Tejun Heo's avatar
Tejun Heo committed
624
out_unlock:
625
	mutex_unlock(&kernfs_mutex);
Tejun Heo's avatar
Tejun Heo committed
626
	return ret;
627 628 629
}

/**
630 631
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
632 633 634
 * @name: name to look for
 * @ns: the namespace tag to use
 *
635 636
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
637
 */
638 639 640
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
641
{
642
	struct rb_node *node = parent->dir.children.rb_node;
643
	bool has_ns = kernfs_ns_enabled(parent);
644 645
	unsigned int hash;

646
	lockdep_assert_held(&kernfs_mutex);
647 648

	if (has_ns != (bool)ns) {
649
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
650
		     has_ns ? "required" : "invalid", parent->name, name);
651 652 653
		return NULL;
	}

654
	hash = kernfs_name_hash(name, ns);
655
	while (node) {
656
		struct kernfs_node *kn;
657 658
		int result;

659
		kn = rb_to_kn(node);
660
		result = kernfs_name_compare(hash, name, ns, kn);
661 662 663 664 665
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
666
			return kn;
667 668 669 670 671
	}
	return NULL;
}

/**
672 673
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
674 675 676
 * @name: name to look for
 * @ns: the namespace tag to use
 *
677
 * Look for kernfs_node with name @name under @parent and get a reference
678
 * if found.  This function may sleep and returns pointer to the found
679
 * kernfs_node on success, %NULL on failure.
680
 */
681 682
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
683
{
684
	struct kernfs_node *kn;
685

686
	mutex_lock(&kernfs_mutex);
687 688
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
689
	mutex_unlock(&kernfs_mutex);
690

691
	return kn;
692 693 694
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

695 696
/**
 * kernfs_create_root - create a new kernfs hierarchy
697
 * @scops: optional syscall operations for the hierarchy
698
 * @flags: KERNFS_ROOT_* flags
699 700 701 702 703
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
704
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
705
				       unsigned int flags, void *priv)
706 707
{
	struct kernfs_root *root;
708
	struct kernfs_node *kn;
709 710 711 712 713

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

714
	ida_init(&root->ino_ida);
715
	INIT_LIST_HEAD(&root->supers);
716

717 718
	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
			       KERNFS_DIR);
719
	if (!kn) {
720
		ida_destroy(&root->ino_ida);
721 722 723 724
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

725
	kn->priv = priv;
726
	kn->dir.root = root;
727

728
	root->syscall_ops = scops;
729
	root->flags = flags;
730
	root->kn = kn;
731
	init_waitqueue_head(&root->deactivate_waitq);
732

733 734 735
	if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
		kernfs_activate(kn);

736 737 738 739 740 741 742 743 744 745 746 747
	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
748
	kernfs_remove(root->kn);	/* will also free @root */
749 750
}

751 752 753 754
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
755
 * @mode: mode of the new directory
756 757 758 759 760
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
761
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
762 763
					 const char *name, umode_t mode,
					 void *priv, const void *ns)
764
{
765
	struct kernfs_node *kn;
766 767 768
	int rc;

	/* allocate */
769
	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
770
	if (!kn)
771 772
		return ERR_PTR(-ENOMEM);

773 774
	kn->dir.root = parent->dir.root;
	kn->ns = ns;
775
	kn->priv = priv;
776 777

	/* link in */
Tejun Heo's avatar
Tejun Heo committed
778
	rc = kernfs_add_one(kn);
779
	if (!rc)
780
		return kn;
781

782
	kernfs_put(kn);
783 784 785
	return ERR_PTR(rc);
}

786 787 788
static struct dentry *kernfs_iop_lookup(struct inode *dir,
					struct dentry *dentry,
					unsigned int flags)
789
{
Tejun Heo's avatar
Tejun Heo committed
790
	struct dentry *ret;
791 792
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
793 794 795
	struct inode *inode;
	const void *ns = NULL;

796
	mutex_lock(&kernfs_mutex);
797

798
	if (kernfs_ns_enabled(parent))
799
		ns = kernfs_info(dir->i_sb)->ns;
800

801
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
802 803

	/* no such entry */
804
	if (!kn || !kernfs_active(kn)) {
Tejun Heo's avatar
Tejun Heo committed
805
		ret = NULL;
806 807
		goto out_unlock;
	}
808 809
	kernfs_get(kn);
	dentry->d_fsdata = kn;
810 811

	/* attach dentry and inode */
812
	inode = kernfs_get_inode(dir->i_sb, kn);
813 814 815 816 817 818
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
819
	ret = d_splice_alias(inode, dentry);
820
 out_unlock:
821
	mutex_unlock(&kernfs_mutex);
822 823 824
	return ret;
}

Tejun Heo's avatar
Tejun Heo committed
825 826 827 828
static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
			    umode_t mode)
{
	struct kernfs_node *parent = dir->i_private;
829
	struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
830
	int ret;
Tejun Heo's avatar
Tejun Heo committed
831

832
	if (!scops || !scops->mkdir)
Tejun Heo's avatar
Tejun Heo committed
833 834
		return -EPERM;

835 836 837
	if (!kernfs_get_active(parent))
		return -ENODEV;

838
	ret = scops->mkdir(parent, dentry->d_name.name, mode);
839 840 841

	kernfs_put_active(parent);
	return ret;
Tejun Heo's avatar
Tejun Heo committed
842 843 844 845 846
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct kernfs_node *kn  = dentry->d_fsdata;
847
	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
848
	int ret;
Tejun Heo's avatar
Tejun Heo committed
849

850
	if (!scops || !scops->rmdir)
Tejun Heo's avatar
Tejun Heo committed
851 852
		return -EPERM;

853 854 855
	if (!kernfs_get_active(kn))
		return -ENODEV;

856
	ret = scops->rmdir(kn);
857 858 859

	kernfs_put_active(kn);
	return ret;
Tejun Heo's avatar
Tejun Heo committed
860 861 862 863 864 865 866
}

static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
			     struct inode *new_dir, struct dentry *new_dentry)
{
	struct kernfs_node *kn  = old_dentry->d_fsdata;
	struct kernfs_node *new_parent = new_dir->i_private;
867
	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
868
	int ret;
Tejun Heo's avatar
Tejun Heo committed
869

870
	if (!scops || !scops->rename)
Tejun Heo's avatar
Tejun Heo committed
871 872
		return -EPERM;

873 874 875 876 877 878 879 880
	if (!kernfs_get_active(kn))
		return -ENODEV;

	if (!kernfs_get_active(new_parent)) {
		kernfs_put_active(kn);
		return -ENODEV;
	}

881
	ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
882 883 884 885

	kernfs_put_active(new_parent);
	kernfs_put_active(kn);
	return ret;
Tejun Heo's avatar
Tejun Heo committed
886 887
}

888
const struct inode_operations kernfs_dir_iops = {
889 890 891 892 893 894 895 896
	.lookup		= kernfs_iop_lookup,
	.permission	= kernfs_iop_permission,
	.setattr	= kernfs_iop_setattr,
	.getattr	= kernfs_iop_getattr,
	.setxattr	= kernfs_iop_setxattr,
	.removexattr	= kernfs_iop_removexattr,
	.getxattr	= kernfs_iop_getxattr,
	.listxattr	= kernfs_iop_listxattr,
Tejun Heo's avatar
Tejun Heo committed
897 898 899 900

	.mkdir		= kernfs_iop_mkdir,
	.rmdir		= kernfs_iop_rmdir,
	.rename		= kernfs_iop_rename,
901 902
};

903
static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
904
{
905
	struct kernfs_node *last;
906 907 908 909 910 911

	while (true) {
		struct rb_node *rbn;

		last = pos;

912
		if (kernfs_type(pos) != KERNFS_DIR)
913 914
			break;

915
		rbn = rb_first(&pos->dir.children);
916 917 918
		if (!rbn)
			break;

919
		pos = rb_to_kn(rbn);
920 921 922 923 924 925
	}

	return last;
}

/**
926
 * kernfs_next_descendant_post - find the next descendant for post-order walk
927
 * @pos: the current position (%NULL to initiate traversal)
928
 * @root: kernfs_node whose descendants to walk
929 930 931 932 933
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
934 935
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
						       struct kernfs_node *root)
936 937 938
{
	struct rb_node *rbn;

939
	lockdep_assert_held(&kernfs_mutex);
940 941 942

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
943
		return kernfs_leftmost_descendant(root);
944 945 946 947 948 949

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
950
	rbn = rb_next(&pos->rb);
951
	if (rbn)
952
		return kernfs_leftmost_descendant(rb_to_kn(rbn));
953 954

	/* no sibling left, visit parent */
955
	return pos->parent;
956 957
}

958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
/**
 * kernfs_activate - activate a node which started deactivated
 * @kn: kernfs_node whose subtree is to be activated
 *
 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
 * needs to be explicitly activated.  A node which hasn't been activated
 * isn't visible to userland and deactivation is skipped during its
 * removal.  This is useful to construct atomic init sequences where
 * creation of multiple nodes should either succeed or fail atomically.
 *
 * The caller is responsible for ensuring that this function is not called
 * after kernfs_remove*() is invoked on @kn.
 */
void kernfs_activate(struct kernfs_node *kn)
{
	struct kernfs_node *pos;

	mutex_lock(&kernfs_mutex);

	pos = NULL;
	while ((pos = kernfs_next_descendant_post(pos, kn))) {
		if (!pos || (pos->flags & KERNFS_ACTIVATED))
			continue;

		WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
		WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);

		atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
		pos->flags |= KERNFS_ACTIVATED;
	}

	mutex_unlock(&kernfs_mutex);
}

Tejun Heo's avatar
Tejun Heo committed
992
static void __kernfs_remove(struct kernfs_node *kn)
993
{
994 995 996
	struct kernfs_node *pos;

	lockdep_assert_held(&kernfs_mutex);
997

998 999 1000 1001 1002 1003
	/*
	 * Short-circuit if non-root @kn has already finished removal.
	 * This is for kernfs_remove_self() which plays with active ref
	 * after removal.
	 */
	if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1004 1005
		return;

1006
	pr_debug("kernfs %s: removing\n", kn->name);
1007

Tejun Heo's avatar
Tejun Heo committed
1008
	/* prevent any new usage under @kn by deactivating all nodes */
1009 1010
	pos = NULL;
	while ((pos = kernfs_next_descendant_post(pos, kn)))
Tejun Heo's avatar
Tejun Heo committed
1011 1012
		if (kernfs_active(pos))
			atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1013 1014

	/* deactivate and unlink the subtree node-by-node */
1015
	do {
1016 1017 1018
		pos = kernfs_leftmost_descendant(kn);

		/*
Tejun Heo's avatar
Tejun Heo committed
1019 1020 1021 1022
		 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
		 * base ref could have been put by someone else by the time
		 * the function returns.  Make sure it doesn't go away
		 * underneath us.
1023 1024 1025
		 */
		kernfs_get(pos);

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
		/*
		 * Drain iff @kn was activated.  This avoids draining and
		 * its lockdep annotations for nodes which have never been
		 * activated and allows embedding kernfs_remove() in create
		 * error paths without worrying about draining.
		 */
		if (kn->flags & KERNFS_ACTIVATED)
			kernfs_drain(pos);
		else
			WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050

		/*
		 * kernfs_unlink_sibling() succeeds once per node.  Use it
		 * to decide who's responsible for cleanups.
		 */
		if (!pos->parent || kernfs_unlink_sibling(pos)) {
			struct kernfs_iattrs *ps_iattr =
				pos->parent ? pos->parent->iattr : NULL;

			/* update timestamps on the parent */
			if (ps_iattr) {
				ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
				ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
			}

Tejun Heo's avatar
Tejun Heo committed
1051
			kernfs_put(pos);
1052 1053 1054 1055
		}

		kernfs_put(pos);
	} while (pos != kn);
1056 1057 1058
}

/**
1059 1060
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
1061
 *
1062
 * Remove @kn along with all its subdirectories and files.
1063
 */
1064
void kernfs_remove(struct kernfs_node *kn)
1065
{
Tejun Heo's avatar
Tejun Heo committed
1066 1067 1068
	mutex_lock(&kernfs_mutex);
	__kernfs_remove(kn);
	mutex_unlock(&kernfs_mutex);
1069 1070
}

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
/**
 * kernfs_break_active_protection - break out of active protection
 * @kn: the self kernfs_node
 *
 * The caller must be running off of a kernfs operation which is invoked
 * with an active reference - e.g. one of kernfs_ops.  Each invocation of
 * this function must also be matched with an invocation of
 * kernfs_unbreak_active_protection().
 *
 * This function releases the active reference of @kn the caller is
 * holding.  Once this function is called, @kn may be removed at any point
 * and the caller is solely responsible for ensuring that the objects it
 * dereferences are accessible.
 */
void kernfs_break_active_protection(struct kernfs_node *kn)
{
	/*
	 * Take out ourself out of the active ref dependency chain.  If
	 * we're called without an active ref, lockdep will complain.
	 */
	kernfs_put_active(kn);
}

/**
 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
 * @kn: the self kernfs_node
 *
 * If kernfs_break_active_protection() was called, this function must be
 * invoked before finishing the kernfs operation.  Note that while this
 * function restores the active reference, it doesn't and can't actually
 * restore the active protection - @kn may already or be in the process of
 * being removed.  Once kernfs_break_active_protection() is invoked, that
 * protection is irreversibly gone for the kernfs operation instance.
 *
 * While this function may be called at any point after
 * kernfs_break_active_protection() is invoked, its most useful location
 * would be right before the enclosing kernfs operation returns.
 */
void kernfs_unbreak_active_protection(struct kernfs_node *kn)
{
	/*
	 * @kn->active could be in any state; however, the increment we do
	 * here will be undone as soon as the enclosing kernfs operation
	 * finishes and this temporary bump can't break anything.  If @kn
	 * is alive, nothing changes.  If @kn is being deactivated, the
	 * soon-to-follow put will either finish deactivation or restore
	 * deactivated state.  If @kn is already removed, the temporary
	 * bump is guaranteed to be gone before @kn is released.
	 */
	atomic_inc(&kn->active);
	if (kernfs_lockdep(kn))
		rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
}

/**
 * kernfs_remove_self - remove a kernfs_node from its own method
 * @kn: the self kernfs_node to remove
 *
 * The caller must be running off of a kernfs operation which is invoked
 * with an active reference - e.g. one of kernfs_ops.  This can be used to
 * implement a file operation which deletes itself.
 *
 * For example, the "delete" file for a sysfs device directory can be
 * implemented by invoking kernfs_remove_self() on the "delete" file
 * itself.  This function breaks the circular dependency of trying to
 * deactivate self while holding an active ref itself.  It isn't necessary
 * to modify the usual removal path to use kernfs_remove_self().  The
 * "delete" implementation can simply invoke kernfs_remove_self() on self
 * before proceeding with the usual removal path.  kernfs will ignore later
 * kernfs_remove() on self.
 *
 * kernfs_remove_self() can be called multiple times concurrently on the
 * same kernfs_node.  Only the first one actually performs removal and
 * returns %true.  All others will wait until the kernfs operation which
 * won self-removal finishes and return %false.  Note that the losers wait
 * for the completion of not only the winning kernfs_remove_self() but also
 * the whole kernfs_ops which won the arbitration.  This can be used to
 * guarantee, for example, all concurrent writes to a "delete" file to
 * finish only after the whole operation is complete.
 */
bool kernfs_remove_self(struct kernfs_node *kn)
{
	bool ret;

	mutex_lock(&kernfs_mutex);
	kernfs_break_active_protection(kn);

	/*
	 * SUICIDAL is used to arbitrate among competing invocations.  Only
	 * the first one will actually perform removal.  When the removal
	 * is complete, SUICIDED is set and the active ref is restored
	 * while holding kernfs_mutex.  The ones which lost arbitration
	 * waits for SUICDED && drained which can happen only after the
	 * enclosing kernfs operation which executed the winning instance
	 * of kernfs_remove_self() finished.
	 */
	if (!(kn->flags & KERNFS_SUICIDAL)) {
		kn->flags |= KERNFS_SUICIDAL;
		__kernfs_remove(kn);
		kn->flags |= KERNFS_SUICIDED;
		ret = true;
	} else {
		wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
		DEFINE_WAIT(wait);

		while (true) {
			prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);

			if ((kn->flags & KERNFS_SUICIDED) &&
			    atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
				break;

			mutex_unlock(&kernfs_mutex);
			schedule();
			mutex_lock(&kernfs_mutex);
		}
		finish_wait(waitq, &wait);
		WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
		ret = false;
	}

	/*
	 * This must be done while holding kernfs_mutex; otherwise, waiting
	 * for SUICIDED && deactivated could finish prematurely.
	 */
	kernfs_unbreak_active_protection(kn);

	mutex_unlock(&kernfs_mutex);
	return ret;
}

1202
/**
1203 1204 1205 1206
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
1207
 *
1208 1209
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1210
 */
1211
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1212 1213
			     const void *ns)
{
1214
	struct kernfs_node *kn;
1215

1216
	if (!parent) {
1217
		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1218 1219 1220 1221
			name);
		return -ENOENT;
	}

Tejun Heo's avatar
Tejun Heo committed
1222
	mutex_lock(&kernfs_mutex);
1223

1224 1225
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
Tejun Heo's avatar
Tejun Heo committed
1226
		__kernfs_remove(kn);
1227

Tejun Heo's avatar
Tejun Heo committed
1228
	mutex_unlock(&kernfs_mutex);
1229

1230
	if (kn)
1231 1232 1233 1234 1235 1236 1237
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
1238
 * @kn: target node
1239 1240 1241 1242
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
1243
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1244 1245
		     const char *new_name, const void *new_ns)
{
1246 1247
	struct kernfs_node *old_parent;
	const char *old_name = NULL;
1248 1249
	int error;

1250 1251 1252 1253
	/* can't move or rename root */
	if (!kn->parent)
		return -EINVAL;

1254 1255
	mutex_lock(&kernfs_mutex);

1256
	error = -ENOENT;
Tejun Heo's avatar
Tejun Heo committed
1257
	if (!kernfs_active(kn) || !kernfs_active(new_parent))
1258 1259
		goto out;

1260
	error = 0;
1261 1262
	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
	    (strcmp(kn->name, new_name) == 0))
1263
		goto out;	/* nothing to rename */
1264 1265 1266

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
1267
		goto out;
1268

1269
	/* rename kernfs_node */
1270
	if (strcmp(kn->name, new_name) != 0) {
1271
		error = -ENOMEM;
1272
		new_name = kstrdup_const(new_name, GFP_KERNEL);
1273
		if (!new_name)
1274
			goto out;
1275 1276
	} else {
		new_name = NULL;
1277 1278 1279 1280 1281
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
1282
	kernfs_unlink_sibling(kn);
1283
	kernfs_get(new_parent);
1284 1285 1286 1287 1288

	/* rename_lock protects ->parent and ->name accessors */
	spin_lock_irq(&kernfs_rename_lock);

	old_parent = kn->parent;
1289
	kn->parent = new_parent;
1290 1291 1292

	kn->ns = new_ns;
	if (new_name) {
Tejun Heo's avatar
Tejun Heo committed
1293
		old_name = kn->name;
1294 1295 1296 1297 1298
		kn->name = new_name;
	}

	spin_unlock_irq(&kernfs_rename_lock);

1299
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
1300
	kernfs_link_sibling(kn);
1301

1302
	kernfs_put(old_parent);
1303
	kfree_const(old_name);
1304

1305
	error = 0;
1306
 out:
1307
	mutex_unlock(&kernfs_mutex);
1308 1309 1310 1311
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
1312
static inline unsigned char dt_type(struct kernfs_node *kn)
1313
{
1314
	return (kn->mode >> 12) & 15;
1315 1316
}

1317
static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1318 1319 1320 1321 1322
{
	kernfs_put(filp->private_data);
	return 0;
}

1323
static struct kernfs_node *kernfs_dir_pos(const void *ns,
1324
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1325 1326
{
	if (pos) {
Tejun Heo's avatar
Tejun Heo committed
1327
		int valid = kernfs_active(pos) &&
1328
			pos->parent == parent && hash == pos->hash;
1329 1330 1331 1332 1333
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
1334
		struct rb_node *node = parent->dir.children.rb_node;
1335
		while (node) {
1336
			pos = rb_to_kn(node);
1337

1338
			if (hash < pos->hash)
1339
				node = node->rb_left;
1340
			else if (hash > pos->hash)
1341 1342 1343 1344 1345
				node = node->rb_right;
			else
				break;
		}
	}
1346 1347
	/* Skip over entries which are dying/dead or in the wrong namespace */
	while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1348
		struct rb_node *node = rb_next(&pos->rb);
1349 1350 1351
		if (!node)
			pos = NULL;
		else
1352
			pos = rb_to_kn(node);
1353 1354 1355 1356
	}
	return pos;
}

1357
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1358
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1359
{
1360
	pos = kernfs_dir_pos(ns, parent, ino, pos);
1361
	if (pos) {
1362
		do {
1363
			struct rb_node *node = rb_next(&pos->rb);
1364 1365 1366
			if (!node)
				pos = NULL;
			else
1367
				pos = rb_to_kn(node);
1368 1369
		} while (pos && (!kernfs_active(pos) || pos->ns != ns));
	}
1370 1371 1372
	return pos;
}

1373
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1374 1375
{
	struct dentry *dentry = file->f_path.dentry;
1376 1377
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
1378 1379 1380 1381
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
1382
	mutex_lock(&kernfs_mutex);
1383

1384
	if (kernfs_ns_enabled(parent))
1385
		ns = kernfs_info(dentry->d_sb)->ns;
1386

1387
	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1388
	     pos;
1389
	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1390
		const char *name = pos->name;
1391 1392
		unsigned int type = dt_type(pos);
		int len = strlen(name);
1393
		ino_t ino = pos->ino;
1394

1395
		ctx->pos = pos->hash;
1396 1397 1398
		file->private_data = pos;
		kernfs_get(pos);

1399
		mutex_unlock(&kernfs_mutex);
1400 1401
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
1402
		mutex_lock(&kernfs_mutex);
1403
	}
1404
	mutex_unlock(&kernfs_mutex);
1405 1406 1407 1408 1409
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

1410 1411
static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
				    int whence)
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

1423
const struct file_operations kernfs_dir_fops = {
1424
	.read		= generic_read_dir,
1425 1426 1427
	.iterate	= kernfs_fop_readdir,
	.release	= kernfs_dir_fop_release,
	.llseek		= kernfs_dir_fop_llseek,
1428
};