dir.c 85 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 *  linux/fs/nfs/dir.c
 *
 *  Copyright (C) 1992  Rick Sladkey
 *
 *  nfs directory handling functions
 *
 * 10 Apr 1996	Added silly rename for unlink	--okir
 * 28 Sep 1996	Improved directory cache --okir
 * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de 
 *              Re-implemented silly rename for unlink, newly implemented
 *              silly rename for nfs_rename() following the suggestions
 *              of Olaf Kirch (okir) found in this file.
 *              Following Linus comments on my original hack, this version
 *              depends only on the dcache stuff and doesn't touch the inode
 *              layer (iput() and friends).
 *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
 */

21
#include <linux/compat.h>
22
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
23 24 25 26 27 28 29 30 31 32 33 34
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/pagemap.h>
35
#include <linux/pagevec.h>
Linus Torvalds's avatar
Linus Torvalds committed
36
#include <linux/namei.h>
37
#include <linux/mount.h>
38
#include <linux/swap.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
39
#include <linux/sched.h>
40
#include <linux/kmemleak.h>
41
#include <linux/xattr.h>
42
#include <linux/hash.h>
Linus Torvalds's avatar
Linus Torvalds committed
43 44

#include "delegation.h"
45
#include "iostat.h"
46
#include "internal.h"
47
#include "fscache.h"
Linus Torvalds's avatar
Linus Torvalds committed
48

49 50
#include "nfstrace.h"

Linus Torvalds's avatar
Linus Torvalds committed
51 52 53
/* #define NFS_DEBUG_VERBOSE 1 */

static int nfs_opendir(struct inode *, struct file *);
54
static int nfs_closedir(struct inode *, struct file *);
Al Viro's avatar
Al Viro committed
55
static int nfs_readdir(struct file *, struct dir_context *);
56
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
57
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
58
static void nfs_readdir_free_folio(struct folio *);
Linus Torvalds's avatar
Linus Torvalds committed
59

60
const struct file_operations nfs_dir_operations = {
61
	.llseek		= nfs_llseek_dir,
Linus Torvalds's avatar
Linus Torvalds committed
62
	.read		= generic_read_dir,
63
	.iterate_shared	= nfs_readdir,
Linus Torvalds's avatar
Linus Torvalds committed
64
	.open		= nfs_opendir,
65
	.release	= nfs_closedir,
Linus Torvalds's avatar
Linus Torvalds committed
66 67 68
	.fsync		= nfs_fsync_dir,
};

69
const struct address_space_operations nfs_dir_aops = {
70
	.free_folio = nfs_readdir_free_folio,
Bryan Schumaker's avatar
Bryan Schumaker committed
71 72
};

73 74
#define NFS_INIT_DTSIZE PAGE_SIZE

75 76
static struct nfs_open_dir_context *
alloc_nfs_open_dir_context(struct inode *dir)
77
{
78
	struct nfs_inode *nfsi = NFS_I(dir);
79
	struct nfs_open_dir_context *ctx;
80 81

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
82
	if (ctx != NULL) {
83
		ctx->attr_gencount = nfsi->attr_gencount;
84
		ctx->dtsize = NFS_INIT_DTSIZE;
85
		spin_lock(&dir->i_lock);
86 87
		if (list_empty(&nfsi->open_files) &&
		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
88 89 90
			nfs_set_cache_invalid(dir,
					      NFS_INO_INVALID_DATA |
						      NFS_INO_REVAL_FORCED);
91
		list_add_tail_rcu(&ctx->list, &nfsi->open_files);
92
		memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
93
		spin_unlock(&dir->i_lock);
94 95 96
		return ctx;
	}
	return  ERR_PTR(-ENOMEM);
97 98
}

99
static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
100
{
101
	spin_lock(&dir->i_lock);
102
	list_del_rcu(&ctx->list);
103
	spin_unlock(&dir->i_lock);
104
	kfree_rcu(ctx, rcu_head);
105 106
}

Linus Torvalds's avatar
Linus Torvalds committed
107 108 109 110 111 112
/*
 * Open file
 */
static int
nfs_opendir(struct inode *inode, struct file *filp)
{
113 114
	int res = 0;
	struct nfs_open_dir_context *ctx;
Linus Torvalds's avatar
Linus Torvalds committed
115

116
	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
117 118

	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
Chuck Lever's avatar
Chuck Lever committed
119

Trond Myklebust's avatar
Trond Myklebust committed
120
	ctx = alloc_nfs_open_dir_context(inode);
121 122 123 124 125 126
	if (IS_ERR(ctx)) {
		res = PTR_ERR(ctx);
		goto out;
	}
	filp->private_data = ctx;
out:
Linus Torvalds's avatar
Linus Torvalds committed
127 128 129
	return res;
}

130 131 132
static int
nfs_closedir(struct inode *inode, struct file *filp)
{
Al Viro's avatar
Al Viro committed
133
	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
134 135 136
	return 0;
}

Bryan Schumaker's avatar
Bryan Schumaker committed
137 138 139
struct nfs_cache_array_entry {
	u64 cookie;
	u64 ino;
140 141
	const char *name;
	unsigned int name_len;
142
	unsigned char d_type;
Bryan Schumaker's avatar
Bryan Schumaker committed
143 144 145
};

struct nfs_cache_array {
146
	u64 change_attr;
Bryan Schumaker's avatar
Bryan Schumaker committed
147
	u64 last_cookie;
148 149
	unsigned int size;
	unsigned char page_full : 1,
150 151
		      page_is_eof : 1,
		      cookies_are_ordered : 1;
152
	struct nfs_cache_array_entry array[];
Bryan Schumaker's avatar
Bryan Schumaker committed
153 154
};

155
struct nfs_readdir_descriptor {
Linus Torvalds's avatar
Linus Torvalds committed
156 157
	struct file	*file;
	struct page	*page;
Al Viro's avatar
Al Viro committed
158
	struct dir_context *ctx;
159
	pgoff_t		page_index;
160
	pgoff_t		page_index_max;
161
	u64		dir_cookie;
162
	u64		last_cookie;
163
	loff_t		current_index;
Bryan Schumaker's avatar
Bryan Schumaker committed
164

165
	__be32		verf[NFS_DIR_VERIFIER_SIZE];
166
	unsigned long	dir_verifier;
167
	unsigned long	timestamp;
168
	unsigned long	gencount;
169
	unsigned long	attr_gencount;
Bryan Schumaker's avatar
Bryan Schumaker committed
170
	unsigned int	cache_entry_index;
171 172
	unsigned int	buffer_fills;
	unsigned int	dtsize;
173
	bool clear_cache;
174
	bool plus;
175
	bool eob;
176
	bool eof;
177
};
Linus Torvalds's avatar
Linus Torvalds committed
178

179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
{
	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
	unsigned int maxsize = server->dtsize;

	if (sz > maxsize)
		sz = maxsize;
	if (sz < NFS_MIN_FILE_IO_SIZE)
		sz = NFS_MIN_FILE_IO_SIZE;
	desc->dtsize = sz;
}

static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
{
	nfs_set_dtsize(desc, desc->dtsize >> 1);
}

static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
{
	nfs_set_dtsize(desc, desc->dtsize << 1);
}

201 202
static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
					u64 change_attr)
203 204 205 206
{
	struct nfs_cache_array *array;

	array = kmap_atomic(page);
207
	array->change_attr = change_attr;
208
	array->last_cookie = last_cookie;
209 210 211
	array->size = 0;
	array->page_full = 0;
	array->page_is_eof = 0;
212
	array->cookies_are_ordered = 1;
213 214 215
	kunmap_atomic(array);
}

Bryan Schumaker's avatar
Bryan Schumaker committed
216 217 218
/*
 * we are freeing strings created by nfs_add_to_readdir_array()
 */
219
static void nfs_readdir_clear_array(struct page *page)
Bryan Schumaker's avatar
Bryan Schumaker committed
220
{
221
	struct nfs_cache_array *array;
222
	unsigned int i;
223

224
	array = kmap_atomic(page);
225
	for (i = 0; i < array->size; i++)
226
		kfree(array->array[i].name);
227
	array->size = 0;
228
	kunmap_atomic(array);
Bryan Schumaker's avatar
Bryan Schumaker committed
229 230
}

231 232 233 234 235
static void nfs_readdir_free_folio(struct folio *folio)
{
	nfs_readdir_clear_array(&folio->page);
}

236 237 238 239 240 241 242
static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie,
					  u64 change_attr)
{
	nfs_readdir_clear_array(page);
	nfs_readdir_page_init_array(page, last_cookie, change_attr);
}

243 244 245 246 247
static struct page *
nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
{
	struct page *page = alloc_page(gfp_flags);
	if (page)
248
		nfs_readdir_page_init_array(page, last_cookie, 0);
249 250 251 252 253 254 255 256 257 258 259
	return page;
}

static void nfs_readdir_page_array_free(struct page *page)
{
	if (page) {
		nfs_readdir_clear_array(page);
		put_page(page);
	}
}

260 261 262 263 264
static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
{
	return array->size == 0 ? array->last_cookie : array->array[0].cookie;
}

265 266 267 268 269 270 271 272 273 274 275
static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
{
	array->page_is_eof = 1;
	array->page_full = 1;
}

static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
{
	return array->page_full;
}

Bryan Schumaker's avatar
Bryan Schumaker committed
276 277 278 279 280
/*
 * the caller is responsible for freeing qstr.name
 * when called by nfs_readdir_add_to_array, the strings will be freed in
 * nfs_clear_readdir_array()
 */
281
static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
Bryan Schumaker's avatar
Bryan Schumaker committed
282
{
283 284
	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);

285 286 287 288
	/*
	 * Avoid a kmemleak false positive. The pointer to the name is stored
	 * in a page cache page which kmemleak does not scan.
	 */
289 290 291
	if (ret != NULL)
		kmemleak_not_leak(ret);
	return ret;
Bryan Schumaker's avatar
Bryan Schumaker committed
292 293
}

294 295 296 297 298 299
static size_t nfs_readdir_array_maxentries(void)
{
	return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
	       sizeof(struct nfs_cache_array_entry);
}

300 301 302 303 304 305 306
/*
 * Check that the next array entry lies entirely within the page bounds
 */
static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
{
	if (array->page_full)
		return -ENOSPC;
307
	if (array->size == nfs_readdir_array_maxentries()) {
308 309 310
		array->page_full = 1;
		return -ENOSPC;
	}
311
	return 0;
Bryan Schumaker's avatar
Bryan Schumaker committed
312 313
}

314 315 316
static int nfs_readdir_page_array_append(struct page *page,
					 const struct nfs_entry *entry,
					 u64 *cookie)
Bryan Schumaker's avatar
Bryan Schumaker committed
317
{
318
	struct nfs_cache_array *array;
319
	struct nfs_cache_array_entry *cache_entry;
320
	const char *name;
321
	int ret = -ENOMEM;
322

323
	name = nfs_readdir_copy_name(entry->name, entry->len);
324

325
	array = kmap_atomic(page);
326 327
	if (!name)
		goto out;
328
	ret = nfs_readdir_array_can_expand(array);
329 330
	if (ret) {
		kfree(name);
331
		goto out;
332
	}
Bryan Schumaker's avatar
Bryan Schumaker committed
333

334
	cache_entry = &array->array[array->size];
335
	cache_entry->cookie = array->last_cookie;
336
	cache_entry->ino = entry->ino;
337
	cache_entry->d_type = entry->d_type;
338 339
	cache_entry->name_len = entry->len;
	cache_entry->name = name;
Bryan Schumaker's avatar
Bryan Schumaker committed
340
	array->last_cookie = entry->cookie;
341 342
	if (array->last_cookie <= cache_entry->cookie)
		array->cookies_are_ordered = 0;
343
	array->size++;
Trond Myklebust's avatar
Trond Myklebust committed
344
	if (entry->eof != 0)
345
		nfs_readdir_array_set_eof(array);
346
out:
347
	*cookie = array->last_cookie;
348
	kunmap_atomic(array);
349
	return ret;
Bryan Schumaker's avatar
Bryan Schumaker committed
350 351
}

352 353 354 355 356 357
#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
/*
 * Hash algorithm allowing content addressible access to sequences
 * of directory cookies. Content is addressed by the value of the
 * cookie index of the first readdir entry in a page.
 *
358
 * We select only the first 18 bits to avoid issues with excessive
359 360 361 362 363 364 365 366 367
 * memory use for the page cache XArray. 18 bits should allow the caching
 * of 262144 pages of sequences of readdir entries. Since each page holds
 * 127 readdir entries for a typical 64-bit system, that works out to a
 * cache of ~ 33 million entries per directory.
 */
static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
{
	if (cookie == 0)
		return 0;
368
	return hash_64(cookie, 18);
369 370
}

371 372 373 374 375 376 377 378
static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
				      u64 change_attr)
{
	struct nfs_cache_array *array = kmap_atomic(page);
	int ret = true;

	if (array->change_attr != change_attr)
		ret = false;
379
	if (nfs_readdir_array_index_cookie(array) != last_cookie)
380 381 382 383 384 385 386 387 388 389 390
		ret = false;
	kunmap_atomic(array);
	return ret;
}

static void nfs_readdir_page_unlock_and_put(struct page *page)
{
	unlock_page(page);
	put_page(page);
}

391 392 393 394 395 396 397 398 399 400 401 402
static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie,
					       u64 change_attr)
{
	if (PageUptodate(page)) {
		if (nfs_readdir_page_validate(page, cookie, change_attr))
			return;
		nfs_readdir_clear_array(page);
	}
	nfs_readdir_page_init_array(page, cookie, change_attr);
	SetPageUptodate(page);
}

403
static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
404
						u64 cookie, u64 change_attr)
405
{
406
	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
407 408 409
	struct page *page;

	page = grab_cache_page(mapping, index);
410 411
	if (!page)
		return NULL;
412
	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
	return page;
}

static u64 nfs_readdir_page_last_cookie(struct page *page)
{
	struct nfs_cache_array *array;
	u64 ret;

	array = kmap_atomic(page);
	ret = array->last_cookie;
	kunmap_atomic(array);
	return ret;
}

static bool nfs_readdir_page_needs_filling(struct page *page)
{
	struct nfs_cache_array *array;
	bool ret;

	array = kmap_atomic(page);
	ret = !nfs_readdir_array_is_full(array);
	kunmap_atomic(array);
435
	return ret;
Bryan Schumaker's avatar
Bryan Schumaker committed
436 437
}

438 439 440 441 442 443 444 445 446
static void nfs_readdir_page_set_eof(struct page *page)
{
	struct nfs_cache_array *array;

	array = kmap_atomic(page);
	nfs_readdir_array_set_eof(array);
	kunmap_atomic(array);
}

447
static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
448
					      u64 cookie, u64 change_attr)
449
{
450
	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
451 452
	struct page *page;

453
	page = grab_cache_page_nowait(mapping, index);
454 455
	if (!page)
		return NULL;
456
	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
457 458 459
	if (nfs_readdir_page_last_cookie(page) != cookie)
		nfs_readdir_page_reinit_array(page, cookie, change_attr);
	return page;
460 461
}

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
static inline
int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
	return in_compat_syscall();
#else
	return (BITS_PER_LONG == 32);
#endif
}

static
bool nfs_readdir_use_cookie(const struct file *filp)
{
	if ((filp->f_mode & FMODE_32BITHASH) ||
	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
		return false;
	return true;
}

481 482 483 484 485 486 487 488 489
static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
					struct nfs_readdir_descriptor *desc)
{
	if (array->page_full) {
		desc->last_cookie = array->last_cookie;
		desc->current_index += array->size;
		desc->cache_entry_index = 0;
		desc->page_index++;
	} else
490
		desc->last_cookie = nfs_readdir_array_index_cookie(array);
491 492
}

493 494 495 496 497 498 499
static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
{
	desc->current_index = 0;
	desc->last_cookie = 0;
	desc->page_index = 0;
}

500 501
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
				      struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
502
{
Al Viro's avatar
Al Viro committed
503
	loff_t diff = desc->ctx->pos - desc->current_index;
Bryan Schumaker's avatar
Bryan Schumaker committed
504 505 506 507 508
	unsigned int index;

	if (diff < 0)
		goto out_eof;
	if (diff >= array->size) {
509
		if (array->page_is_eof)
Bryan Schumaker's avatar
Bryan Schumaker committed
510
			goto out_eof;
511
		nfs_readdir_seek_next_array(array, desc);
Bryan Schumaker's avatar
Bryan Schumaker committed
512 513 514 515
		return -EAGAIN;
	}

	index = (unsigned int)diff;
516
	desc->dir_cookie = array->array[index].cookie;
Bryan Schumaker's avatar
Bryan Schumaker committed
517 518 519
	desc->cache_entry_index = index;
	return 0;
out_eof:
520
	desc->eof = true;
Bryan Schumaker's avatar
Bryan Schumaker committed
521 522 523
	return -EBADCOOKIE;
}

524 525 526 527 528 529 530 531 532 533 534 535 536
static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
					      u64 cookie)
{
	if (!array->cookies_are_ordered)
		return true;
	/* Optimisation for monotonically increasing cookies */
	if (cookie >= array->last_cookie)
		return false;
	if (array->size && cookie < array->array[0].cookie)
		return false;
	return true;
}

537 538
static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
					 struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
539
{
540
	unsigned int i;
Bryan Schumaker's avatar
Bryan Schumaker committed
541 542
	int status = -EAGAIN;

543 544 545
	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
		goto check_eof;

Bryan Schumaker's avatar
Bryan Schumaker committed
546
	for (i = 0; i < array->size; i++) {
547
		if (array->array[i].cookie == desc->dir_cookie) {
548
			if (nfs_readdir_use_cookie(desc->file))
549
				desc->ctx->pos = desc->dir_cookie;
550
			else
551
				desc->ctx->pos = desc->current_index + i;
Bryan Schumaker's avatar
Bryan Schumaker committed
552
			desc->cache_entry_index = i;
Trond Myklebust's avatar
Trond Myklebust committed
553
			return 0;
Bryan Schumaker's avatar
Bryan Schumaker committed
554 555
		}
	}
556
check_eof:
557
	if (array->page_is_eof) {
558
		status = -EBADCOOKIE;
559
		if (desc->dir_cookie == array->last_cookie)
560
			desc->eof = true;
561 562
	} else
		nfs_readdir_seek_next_array(array, desc);
Bryan Schumaker's avatar
Bryan Schumaker committed
563 564 565
	return status;
}

566
static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
567 568
{
	struct nfs_cache_array *array;
Trond Myklebust's avatar
Trond Myklebust committed
569
	int status;
Bryan Schumaker's avatar
Bryan Schumaker committed
570

571
	array = kmap_atomic(desc->page);
Bryan Schumaker's avatar
Bryan Schumaker committed
572

573
	if (desc->dir_cookie == 0)
Bryan Schumaker's avatar
Bryan Schumaker committed
574 575 576 577
		status = nfs_readdir_search_for_pos(array, desc);
	else
		status = nfs_readdir_search_for_cookie(array, desc);

578
	kunmap_atomic(array);
Bryan Schumaker's avatar
Bryan Schumaker committed
579 580 581 582
	return status;
}

/* Fill a page with xdr information before transferring to the cache page */
Trond Myklebust's avatar
Trond Myklebust committed
583
static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
584 585 586
				  __be32 *verf, u64 cookie,
				  struct page **pages, size_t bufsize,
				  __be32 *verf_res)
Linus Torvalds's avatar
Linus Torvalds committed
587
{
588 589 590 591
	struct inode *inode = file_inode(desc->file);
	struct nfs_readdir_arg arg = {
		.dentry = file_dentry(desc->file),
		.cred = desc->file->f_cred,
592
		.verf = verf,
593 594 595 596 597 598 599 600
		.cookie = cookie,
		.pages = pages,
		.page_len = bufsize,
		.plus = desc->plus,
	};
	struct nfs_readdir_res res = {
		.verf = verf_res,
	};
601
	unsigned long	timestamp, gencount;
Linus Torvalds's avatar
Linus Torvalds committed
602 603 604 605
	int		error;

 again:
	timestamp = jiffies;
606
	gencount = nfs_inc_attr_generation_counter();
607
	desc->dir_verifier = nfs_save_change_attribute(inode);
608
	error = NFS_PROTO(inode)->readdir(&arg, &res);
Linus Torvalds's avatar
Linus Torvalds committed
609 610 611 612
	if (error < 0) {
		/* We requested READDIRPLUS, but the server doesn't grok it */
		if (error == -ENOTSUPP && desc->plus) {
			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
613
			desc->plus = arg.plus = false;
Linus Torvalds's avatar
Linus Torvalds committed
614 615 616 617
			goto again;
		}
		goto error;
	}
618
	desc->timestamp = timestamp;
619
	desc->gencount = gencount;
Bryan Schumaker's avatar
Bryan Schumaker committed
620 621
error:
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
622 623
}

624
static int xdr_decode(struct nfs_readdir_descriptor *desc,
625
		      struct nfs_entry *entry, struct xdr_stream *xdr)
Linus Torvalds's avatar
Linus Torvalds committed
626
{
627
	struct inode *inode = file_inode(desc->file);
628
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
629

630
	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
631 632
	if (error)
		return error;
Bryan Schumaker's avatar
Bryan Schumaker committed
633 634 635
	entry->fattr->time_start = desc->timestamp;
	entry->fattr->gencount = desc->gencount;
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
636 637
}

638 639 640
/* Match file and dirent using either filehandle or fileid
 * Note: caller is responsible for checking the fsid
 */
Bryan Schumaker's avatar
Bryan Schumaker committed
641 642 643
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
644
	struct inode *inode;
645 646
	struct nfs_inode *nfsi;

647 648
	if (d_really_is_negative(dentry))
		return 0;
649

650 651 652 653 654
	inode = d_inode(dentry);
	if (is_bad_inode(inode) || NFS_STALE(inode))
		return 0;

	nfsi = NFS_I(inode);
655 656 657 658 659
	if (entry->fattr->fileid != nfsi->fileid)
		return 0;
	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
		return 0;
	return 1;
Bryan Schumaker's avatar
Bryan Schumaker committed
660 661
}

662 663 664 665 666
#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)

static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
				unsigned int cache_hits,
				unsigned int cache_misses)
667 668 669
{
	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
		return false;
670 671
	if (ctx->pos == 0 ||
	    cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
672 673 674 675 676
		return true;
	return false;
}

/*
677
 * This function is called by the getattr code to request the
678
 * use of readdirplus to accelerate any future lookups in the same
679 680
 * directory.
 */
681
void nfs_readdir_record_entry_cache_hit(struct inode *dir)
682
{
683
	struct nfs_inode *nfsi = NFS_I(dir);
684
	struct nfs_open_dir_context *ctx;
685 686

	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
687 688 689 690 691 692
	    S_ISDIR(dir->i_mode)) {
		rcu_read_lock();
		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
			atomic_inc(&ctx->cache_hits);
		rcu_read_unlock();
	}
693 694
}

695 696 697 698 699
/*
 * This function is mainly for use by nfs_getattr().
 *
 * If this is an 'ls -l', we want to force use of readdirplus.
 */
700
void nfs_readdir_record_entry_cache_miss(struct inode *dir)
701
{
702
	struct nfs_inode *nfsi = NFS_I(dir);
703
	struct nfs_open_dir_context *ctx;
704 705

	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
706 707 708 709 710
	    S_ISDIR(dir->i_mode)) {
		rcu_read_lock();
		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
			atomic_inc(&ctx->cache_misses);
		rcu_read_unlock();
711 712 713
	}
}

714 715
static void nfs_lookup_advise_force_readdirplus(struct inode *dir,
						unsigned int flags)
716
{
717 718
	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
		return;
719 720
	if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL))
		return;
721 722 723
	nfs_readdir_record_entry_cache_miss(dir);
}

Bryan Schumaker's avatar
Bryan Schumaker committed
724
static
725 726
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
		unsigned long dir_verifier)
Bryan Schumaker's avatar
Bryan Schumaker committed
727
{
728
	struct qstr filename = QSTR_INIT(entry->name, entry->len);
Al Viro's avatar
Al Viro committed
729
	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
730 731
	struct dentry *dentry;
	struct dentry *alias;
Bryan Schumaker's avatar
Bryan Schumaker committed
732
	struct inode *inode;
733
	int status;
Bryan Schumaker's avatar
Bryan Schumaker committed
734

735 736
	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
		return;
737 738
	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
		return;
739 740 741 742 743 744 745 746
	if (filename.len == 0)
		return;
	/* Validate that the name doesn't contain any illegal '\0' */
	if (strnlen(filename.name, filename.len) != filename.len)
		return;
	/* ...or '/' */
	if (strnchr(filename.name, filename.len, '/'))
		return;
747 748 749 750 751 752
	if (filename.name[0] == '.') {
		if (filename.len == 1)
			return;
		if (filename.len == 2 && filename.name[1] == '.')
			return;
	}
753
	filename.hash = full_name_hash(parent, filename.name, filename.len);
Bryan Schumaker's avatar
Bryan Schumaker committed
754

755
	dentry = d_lookup(parent, &filename);
Al Viro's avatar
Al Viro committed
756 757 758 759 760 761 762
again:
	if (!dentry) {
		dentry = d_alloc_parallel(parent, &filename, &wq);
		if (IS_ERR(dentry))
			return;
	}
	if (!d_in_lookup(dentry)) {
763 764 765 766
		/* Is there a mountpoint here? If so, just exit */
		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
					&entry->fattr->fsid))
			goto out;
Bryan Schumaker's avatar
Bryan Schumaker committed
767
		if (nfs_same_file(dentry, entry)) {
768 769
			if (!entry->fh->size)
				goto out;
770
			nfs_set_verifier(dentry, dir_verifier);
771
			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
772
			if (!status)
773
				nfs_setsecurity(d_inode(dentry), entry->fattr);
774 775
			trace_nfs_readdir_lookup_revalidate(d_inode(parent),
							    dentry, 0, status);
Bryan Schumaker's avatar
Bryan Schumaker committed
776 777
			goto out;
		} else {
778 779
			trace_nfs_readdir_lookup_revalidate_failed(
				d_inode(parent), dentry, 0);
780
			d_invalidate(dentry);
Bryan Schumaker's avatar
Bryan Schumaker committed
781
			dput(dentry);
Al Viro's avatar
Al Viro committed
782 783
			dentry = NULL;
			goto again;
Bryan Schumaker's avatar
Bryan Schumaker committed
784 785
		}
	}
786 787 788 789
	if (!entry->fh->size) {
		d_lookup_done(dentry);
		goto out;
	}
Bryan Schumaker's avatar
Bryan Schumaker committed
790

791
	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
792
	alias = d_splice_alias(inode, dentry);
Al Viro's avatar
Al Viro committed
793 794 795 796 797 798 799
	d_lookup_done(dentry);
	if (alias) {
		if (IS_ERR(alias))
			goto out;
		dput(dentry);
		dentry = alias;
	}
800
	nfs_set_verifier(dentry, dir_verifier);
801
	trace_nfs_readdir_lookup(d_inode(parent), dentry, 0);
Bryan Schumaker's avatar
Bryan Schumaker committed
802 803 804 805
out:
	dput(dentry);
}

806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
				    struct nfs_entry *entry,
				    struct xdr_stream *stream)
{
	int ret;

	if (entry->fattr->label)
		entry->fattr->label->len = NFS4_MAXLABELLEN;
	ret = xdr_decode(desc, entry, stream);
	if (ret || !desc->plus)
		return ret;
	nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier);
	return 0;
}

Bryan Schumaker's avatar
Bryan Schumaker committed
821
/* Perform conversion from xdr to cache array */
822 823
static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
				   struct nfs_entry *entry,
824 825 826
				   struct page **xdr_pages, unsigned int buflen,
				   struct page **arrays, size_t narrays,
				   u64 change_attr)
Linus Torvalds's avatar
Linus Torvalds committed
827
{
828
	struct address_space *mapping = desc->file->f_mapping;
829
	struct xdr_stream stream;
830
	struct xdr_buf buf;
831
	struct page *scratch, *new, *page = *arrays;
832
	u64 cookie;
833
	int status;
834

835 836 837
	scratch = alloc_page(GFP_KERNEL);
	if (scratch == NULL)
		return -ENOMEM;
838

839
	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
840
	xdr_set_scratch_page(&stream, scratch);
841 842

	do {
843
		status = nfs_readdir_entry_decode(desc, entry, &stream);
844
		if (status != 0)
845
			break;
846

847
		status = nfs_readdir_page_array_append(page, entry, &cookie);
848 849 850
		if (status != -ENOSPC)
			continue;

851 852 853
		if (page->mapping != mapping) {
			if (!--narrays)
				break;
854
			new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL);
855 856 857 858 859
			if (!new)
				break;
			arrays++;
			*arrays = page = new;
		} else {
860 861
			new = nfs_readdir_page_get_next(mapping, cookie,
							change_attr);
862 863 864 865 866 867
			if (!new)
				break;
			if (page != *arrays)
				nfs_readdir_page_unlock_and_put(page);
			page = new;
		}
868
		desc->page_index_max++;
869
		status = nfs_readdir_page_array_append(page, entry, &cookie);
870
	} while (!status && !entry->eof);
871

872 873
	switch (status) {
	case -EBADCOOKIE:
874 875 876 877
		if (!entry->eof)
			break;
		nfs_readdir_page_set_eof(page);
		fallthrough;
878
	case -EAGAIN:
879
		status = 0;
880
		break;
881 882 883 884 885 886
	case -ENOSPC:
		status = 0;
		if (!desc->plus)
			break;
		while (!nfs_readdir_entry_decode(desc, entry, &stream))
			;
Linus Torvalds's avatar
Linus Torvalds committed
887
	}
888

889
	if (page != *arrays)
890 891
		nfs_readdir_page_unlock_and_put(page);

892
	put_page(scratch);
893
	return status;
894 895
}

896
static void nfs_readdir_free_pages(struct page **pages, size_t npages)
897
{
898 899 900
	while (npages--)
		put_page(pages[npages]);
	kfree(pages);
901 902 903
}

/*
904 905
 * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
 * to nfs_readdir_free_pages()
906
 */
907
static struct page **nfs_readdir_alloc_pages(size_t npages)
908
{
909 910
	struct page **pages;
	size_t i;
911

912 913 914
	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
	if (!pages)
		return NULL;
915 916 917 918 919 920
	for (i = 0; i < npages; i++) {
		struct page *page = alloc_page(GFP_KERNEL);
		if (page == NULL)
			goto out_freepages;
		pages[i] = page;
	}
921
	return pages;
922 923

out_freepages:
924
	nfs_readdir_free_pages(pages, i);
925
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
926 927
}

928
static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
929 930
				    __be32 *verf_arg, __be32 *verf_res,
				    struct page **arrays, size_t narrays)
931
{
932
	u64 change_attr;
933
	struct page **pages;
934
	struct page *page = *arrays;
935
	struct nfs_entry *entry;
936
	size_t array_size;
937
	struct inode *inode = file_inode(desc->file);
938
	unsigned int dtsize = desc->dtsize;
939
	unsigned int pglen;
940
	int status = -ENOMEM;
Bryan Schumaker's avatar
Bryan Schumaker committed
941

942 943 944 945 946
	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
	if (!entry)
		return -ENOMEM;
	entry->cookie = nfs_readdir_page_last_cookie(page);
	entry->fh = nfs_alloc_fhandle();
947
	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
948 949
	entry->server = NFS_SERVER(inode);
	if (entry->fh == NULL || entry->fattr == NULL)
Bryan Schumaker's avatar
Bryan Schumaker committed
950
		goto out;
951

952 953 954
	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
	pages = nfs_readdir_alloc_pages(array_size);
	if (!pages)
955
		goto out;
956

957
	change_attr = inode_peek_iversion_raw(inode);
958 959 960 961
	status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
					dtsize, verf_res);
	if (status < 0)
		goto free_pages;
962

963 964
	pglen = status;
	if (pglen != 0)
965
		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
966
						 arrays, narrays, change_attr);
967 968 969
	else
		nfs_readdir_page_set_eof(page);
	desc->buffer_fills++;
Bryan Schumaker's avatar
Bryan Schumaker committed
970

971
free_pages:
972
	nfs_readdir_free_pages(pages, array_size);
Bryan Schumaker's avatar
Bryan Schumaker committed
973
out:
974 975 976
	nfs_free_fattr(entry->fattr);
	nfs_free_fhandle(entry->fh);
	kfree(entry);
977 978 979
	return status;
}

980
static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
Linus Torvalds's avatar
Linus Torvalds committed
981
{
982 983
	put_page(desc->page);
	desc->page = NULL;
Bryan Schumaker's avatar
Bryan Schumaker committed
984
}
Linus Torvalds's avatar
Linus Torvalds committed
985

986 987
static void
nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
988
{
989 990
	unlock_page(desc->page);
	nfs_readdir_page_put(desc);
Bryan Schumaker's avatar
Bryan Schumaker committed
991 992
}

993 994
static struct page *
nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
995
{
996 997
	struct address_space *mapping = desc->file->f_mapping;
	u64 change_attr = inode_peek_iversion_raw(mapping->host);
998 999
	u64 cookie = desc->last_cookie;
	struct page *page;
1000

1001 1002 1003 1004 1005 1006
	page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
	if (!page)
		return NULL;
	if (desc->clear_cache && !nfs_readdir_page_needs_filling(page))
		nfs_readdir_page_reinit_array(page, cookie, change_attr);
	return page;
Linus Torvalds's avatar
Linus Torvalds committed
1007 1008 1009
}

/*
Bryan Schumaker's avatar
Bryan Schumaker committed
1010
 * Returns 0 if desc->dir_cookie was found on page desc->page_index
1011
 * and locks the page to prevent removal from the page cache.
Linus Torvalds's avatar
Linus Torvalds committed
1012
 */
1013
static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
Bryan Schumaker's avatar
Bryan Schumaker committed
1014
{
1015 1016
	struct inode *inode = file_inode(desc->file);
	struct nfs_inode *nfsi = NFS_I(inode);
1017
	__be32 verf[NFS_DIR_VERIFIER_SIZE];
Bryan Schumaker's avatar
Bryan Schumaker committed
1018 1019
	int res;

1020 1021 1022 1023
	desc->page = nfs_readdir_page_get_cached(desc);
	if (!desc->page)
		return -ENOMEM;
	if (nfs_readdir_page_needs_filling(desc->page)) {
1024 1025 1026 1027
		/* Grow the dtsize if we had to go back for more pages */
		if (desc->page_index == desc->page_index_max)
			nfs_grow_dtsize(desc);
		desc->page_index_max = desc->page_index;
1028 1029 1030
		trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
					     desc->last_cookie,
					     desc->page->index, desc->dtsize);
1031 1032
		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
					       &desc->page, 1);
1033 1034
		if (res < 0) {
			nfs_readdir_page_unlock_and_put_cached(desc);
1035
			trace_nfs_readdir_cache_fill_done(inode, res);
1036 1037
			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
				invalidate_inode_pages2(desc->file->f_mapping);
1038
				nfs_readdir_rewind_search(desc);
1039 1040
				trace_nfs_readdir_invalidate_cache_range(
					inode, 0, MAX_LFS_FILESIZE);
1041 1042 1043
				return -EAGAIN;
			}
			return res;
1044
		}
1045 1046 1047
		/*
		 * Set the cookie verifier if the page cache was empty
		 */
1048 1049
		if (desc->last_cookie == 0 &&
		    memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
1050 1051
			memcpy(nfsi->cookieverf, verf,
			       sizeof(nfsi->cookieverf));
1052
			invalidate_inode_pages2_range(desc->file->f_mapping, 1,
1053
						      -1);
1054
			trace_nfs_readdir_invalidate_cache_range(
1055
				inode, 1, MAX_LFS_FILESIZE);
1056
		}
1057
		desc->clear_cache = false;
1058
	}
1059
	res = nfs_readdir_search_array(desc);
1060
	if (res == 0)
1061 1062
		return 0;
	nfs_readdir_page_unlock_and_put_cached(desc);
Bryan Schumaker's avatar
Bryan Schumaker committed
1063 1064 1065 1066
	return res;
}

/* Search for desc->dir_cookie from the beginning of the page cache */
1067
static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
Linus Torvalds's avatar
Linus Torvalds committed
1068
{
1069
	int res;
Bryan Schumaker's avatar
Bryan Schumaker committed
1070

Trond Myklebust's avatar
Trond Myklebust committed
1071
	do {
1072
		res = find_and_lock_cache_page(desc);
Trond Myklebust's avatar
Trond Myklebust committed
1073
	} while (res == -EAGAIN);
Linus Torvalds's avatar
Linus Torvalds committed
1074 1075 1076
	return res;
}

1077 1078
#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)

Linus Torvalds's avatar
Linus Torvalds committed
1079 1080 1081
/*
 * Once we've found the start of the dirent within a page: fill 'er up...
 */
1082 1083
static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
			   const __be32 *verf)
Linus Torvalds's avatar
Linus Torvalds committed
1084 1085
{
	struct file	*file = desc->file;
1086
	struct nfs_cache_array *array;
1087
	unsigned int i;
1088
	bool first_emit = !desc->dir_cookie;
1089

1090
	array = kmap_local_page(desc->page);
Bryan Schumaker's avatar
Bryan Schumaker committed
1091
	for (i = desc->cache_entry_index; i < array->size; i++) {
1092
		struct nfs_cache_array_entry *ent;
Linus Torvalds's avatar
Linus Torvalds committed
1093

1094
		ent = &array->array[i];
1095
		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
Al Viro's avatar
Al Viro committed
1096
		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
1097
			desc->eob = true;
Linus Torvalds's avatar
Linus Torvalds committed
1098
			break;
1099
		}
1100
		memcpy(desc->verf, verf, sizeof(desc->verf));
1101
		if (i == array->size - 1) {
1102
			desc->dir_cookie = array->last_cookie;
1103 1104 1105 1106 1107
			nfs_readdir_seek_next_array(array, desc);
		} else {
			desc->dir_cookie = array->array[i + 1].cookie;
			desc->last_cookie = array->array[0].cookie;
		}
1108
		if (nfs_readdir_use_cookie(file))
1109
			desc->ctx->pos = desc->dir_cookie;
1110 1111
		else
			desc->ctx->pos++;
1112 1113 1114 1115
		if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) {
			desc->eob = true;
			break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1116
	}
1117
	if (array->page_is_eof)
1118
		desc->eof = !desc->eob;
Bryan Schumaker's avatar
Bryan Schumaker committed
1119

1120
	kunmap_local(array);
1121 1122
	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
			(unsigned long long)desc->dir_cookie);
Linus Torvalds's avatar
Linus Torvalds committed
1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
}

/*
 * If we cannot find a cookie in our cache, we suspect that this is
 * because it points to a deleted file, so we ask the server to return
 * whatever it thinks is the next entry. We then feed this to filldir.
 * If all goes well, we should then be able to find our way round the
 * cache on the next call to readdir_search_pagecache();
 *
 * NOTE: we cannot add the anonymous page to the pagecache because
 *	 the data it contains might not be page aligned. Besides,
 *	 we should already have a complete representation of the
 *	 directory in the page cache by the time we get here.
 */
1137
static int uncached_readdir(struct nfs_readdir_descriptor *desc)
Linus Torvalds's avatar
Linus Torvalds committed
1138
{
1139 1140
	struct page	**arrays;
	size_t		i, sz = 512;
1141
	__be32		verf[NFS_DIR_VERIFIER_SIZE];
1142
	int		status = -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
1143

1144
	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
1145
			(unsigned long long)desc->dir_cookie);
Linus Torvalds's avatar
Linus Torvalds committed
1146

1147 1148 1149 1150 1151
	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
	if (!arrays)
		goto out;
	arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
	if (!arrays[0])
Linus Torvalds's avatar
Linus Torvalds committed
1152
		goto out;
Bryan Schumaker's avatar
Bryan Schumaker committed
1153

1154
	desc->page_index = 0;
1155
	desc->cache_entry_index = 0;
1156
	desc->last_cookie = desc->dir_cookie;
1157
	desc->page_index_max = 0;
1158

1159 1160 1161
	trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
				   -1, desc->dtsize);

1162
	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
1163 1164 1165 1166
	if (status < 0) {
		trace_nfs_readdir_uncached_done(file_inode(desc->file), status);
		goto out_free;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1167

1168
	for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
1169
		desc->page = arrays[i];
1170
		nfs_do_filldir(desc, verf);
1171 1172
	}
	desc->page = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1173

1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
	/*
	 * Grow the dtsize if we have to go back for more pages,
	 * or shrink it if we're reading too many.
	 */
	if (!desc->eof) {
		if (!desc->eob)
			nfs_grow_dtsize(desc);
		else if (desc->buffer_fills == 1 &&
			 i < (desc->page_index_max >> 1))
			nfs_shrink_dtsize(desc);
	}
1185
out_free:
1186 1187 1188
	for (i = 0; i < sz && arrays[i]; i++)
		nfs_readdir_page_array_free(arrays[i]);
out:
1189 1190
	if (!nfs_readdir_use_cookie(desc->file))
		nfs_readdir_rewind_search(desc);
1191
	desc->page_index_max = -1;
1192 1193
	kfree(arrays);
	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
Linus Torvalds's avatar
Linus Torvalds committed
1194 1195 1196
	return status;
}

1197
static bool nfs_readdir_handle_cache_misses(struct inode *inode,
1198
					    struct nfs_readdir_descriptor *desc,
1199 1200
					    unsigned int cache_misses,
					    bool force_clear)
1201
{
1202 1203 1204 1205 1206 1207
	if (desc->ctx->pos == 0 || !desc->plus)
		return false;
	if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear)
		return false;
	trace_nfs_readdir_force_readdirplus(inode);
	return true;
1208 1209
}

1210 1211 1212
/* The file offset position represents the dirent entry number.  A
   last cookie cache takes care of the common case of reading the
   whole directory.
Linus Torvalds's avatar
Linus Torvalds committed
1213
 */
Al Viro's avatar
Al Viro committed
1214
static int nfs_readdir(struct file *file, struct dir_context *ctx)
Linus Torvalds's avatar
Linus Torvalds committed
1215
{
Miklos Szeredi's avatar
Miklos Szeredi committed
1216
	struct dentry	*dentry = file_dentry(file);
1217
	struct inode	*inode = d_inode(dentry);
1218
	struct nfs_inode *nfsi = NFS_I(inode);
Al Viro's avatar
Al Viro committed
1219
	struct nfs_open_dir_context *dir_ctx = file->private_data;
1220
	struct nfs_readdir_descriptor *desc;
1221
	unsigned int cache_hits, cache_misses;
1222
	bool force_clear;
1223
	int res;
Linus Torvalds's avatar
Linus Torvalds committed
1224

1225 1226
	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
			file, (long long)ctx->pos);
1227 1228
	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);

Linus Torvalds's avatar
Linus Torvalds committed
1229
	/*
Al Viro's avatar
Al Viro committed
1230
	 * ctx->pos points to the dirent entry number.
1231
	 * *desc->dir_cookie has the cookie for the next entry. We have
1232 1233
	 * to either find the entry with the appropriate number or
	 * revalidate the cookie.
Linus Torvalds's avatar
Linus Torvalds committed
1234
	 */
1235
	nfs_revalidate_mapping(inode, file->f_mapping);
1236 1237 1238 1239

	res = -ENOMEM;
	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
1240
		goto out;
1241 1242
	desc->file = file;
	desc->ctx = ctx;
1243
	desc->page_index_max = -1;
1244

1245 1246
	spin_lock(&file->f_lock);
	desc->dir_cookie = dir_ctx->dir_cookie;
1247
	desc->page_index = dir_ctx->page_index;
1248
	desc->last_cookie = dir_ctx->last_cookie;
1249
	desc->attr_gencount = dir_ctx->attr_gencount;
1250
	desc->eof = dir_ctx->eof;
1251
	nfs_set_dtsize(desc, dir_ctx->dtsize);
1252
	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
1253 1254
	cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
	cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
1255
	force_clear = dir_ctx->force_clear;
1256
	spin_unlock(&file->f_lock);
1257

1258 1259 1260 1261 1262
	if (desc->eof) {
		res = 0;
		goto out_free;
	}

1263
	desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
1264 1265 1266
	force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses,
						      force_clear);
	desc->clear_cache = force_clear;
1267

Trond Myklebust's avatar
Trond Myklebust committed
1268
	do {
Linus Torvalds's avatar
Linus Torvalds committed
1269
		res = readdir_search_pagecache(desc);
1270

Linus Torvalds's avatar
Linus Torvalds committed
1271
		if (res == -EBADCOOKIE) {
1272
			res = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1273
			/* This means either end of directory */
1274
			if (desc->dir_cookie && !desc->eof) {
Linus Torvalds's avatar
Linus Torvalds committed
1275
				/* Or that the server has 'lost' a cookie */
Al Viro's avatar
Al Viro committed
1276
				res = uncached_readdir(desc);
1277
				if (res == 0)
Linus Torvalds's avatar
Linus Torvalds committed
1278
					continue;
1279 1280
				if (res == -EBADCOOKIE || res == -ENOTSYNC)
					res = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1281 1282 1283 1284 1285
			}
			break;
		}
		if (res == -ETOOSMALL && desc->plus) {
			nfs_zap_caches(inode);
1286 1287
			desc->plus = false;
			desc->eof = false;
Linus Torvalds's avatar
Linus Torvalds committed
1288 1289 1290 1291 1292
			continue;
		}
		if (res < 0)
			break;

1293
		nfs_do_filldir(desc, nfsi->cookieverf);
1294
		nfs_readdir_page_unlock_and_put_cached(desc);
1295 1296
		if (desc->page_index == desc->page_index_max)
			desc->clear_cache = force_clear;
1297
	} while (!desc->eob && !desc->eof);
1298 1299 1300

	spin_lock(&file->f_lock);
	dir_ctx->dir_cookie = desc->dir_cookie;
1301
	dir_ctx->last_cookie = desc->last_cookie;
1302
	dir_ctx->attr_gencount = desc->attr_gencount;
1303
	dir_ctx->page_index = desc->page_index;
1304
	dir_ctx->force_clear = force_clear;
1305
	dir_ctx->eof = desc->eof;
1306
	dir_ctx->dtsize = desc->dtsize;
1307
	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
1308
	spin_unlock(&file->f_lock);
1309
out_free:
1310 1311
	kfree(desc);

1312
out:
1313
	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
Chuck Lever's avatar
Chuck Lever committed
1314
	return res;
Linus Torvalds's avatar
Linus Torvalds committed
1315 1316
}

1317
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
1318
{
1319
	struct nfs_open_dir_context *dir_ctx = filp->private_data;
1320

1321 1322
	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
			filp, offset, whence);
1323

1324
	switch (whence) {
1325 1326 1327 1328 1329
	default:
		return -EINVAL;
	case SEEK_SET:
		if (offset < 0)
			return -EINVAL;
1330
		spin_lock(&filp->f_lock);
1331 1332 1333 1334
		break;
	case SEEK_CUR:
		if (offset == 0)
			return filp->f_pos;
1335
		spin_lock(&filp->f_lock);
1336 1337
		offset += filp->f_pos;
		if (offset < 0) {
1338
			spin_unlock(&filp->f_lock);
1339 1340
			return -EINVAL;
		}
1341 1342 1343
	}
	if (offset != filp->f_pos) {
		filp->f_pos = offset;
1344
		dir_ctx->page_index = 0;
1345
		if (!nfs_readdir_use_cookie(filp)) {
1346
			dir_ctx->dir_cookie = 0;
1347 1348
			dir_ctx->last_cookie = 0;
		} else {
1349
			dir_ctx->dir_cookie = offset;
1350 1351
			dir_ctx->last_cookie = offset;
		}
1352
		dir_ctx->eof = false;
1353
	}
1354
	spin_unlock(&filp->f_lock);
1355 1356 1357
	return offset;
}

Linus Torvalds's avatar
Linus Torvalds committed
1358 1359 1360 1361
/*
 * All directory operations under NFS are synchronous, so fsync()
 * is a dummy operation.
 */
1362 1363
static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
			 int datasync)
Linus Torvalds's avatar
Linus Torvalds committed
1364
{
1365
	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
Chuck Lever's avatar
Chuck Lever committed
1366

1367
	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
Linus Torvalds's avatar
Linus Torvalds committed
1368 1369 1370
	return 0;
}

1371 1372
/**
 * nfs_force_lookup_revalidate - Mark the directory as having changed
1373
 * @dir: pointer to directory inode
1374 1375 1376 1377 1378
 *
 * This forces the revalidation code in nfs_lookup_revalidate() to do a
 * full lookup on all child dentries of 'dir' whenever a change occurs
 * on the server that might have invalidated our dcache.
 *
1379 1380 1381
 * Note that we reserve bit '0' as a tag to let us know when a dentry
 * was revalidated while holding a delegation on its inode.
 *
1382 1383 1384 1385
 * The caller should be holding dir->i_lock
 */
void nfs_force_lookup_revalidate(struct inode *dir)
{
1386
	NFS_I(dir)->cache_change_attribute += 2;
1387
}
1388
EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1389

1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
/**
 * nfs_verify_change_attribute - Detects NFS remote directory changes
 * @dir: pointer to parent directory inode
 * @verf: previously saved change attribute
 *
 * Return "false" if the verifiers doesn't match the change attribute.
 * This would usually indicate that the directory contents have changed on
 * the server, and that any dentries need revalidating.
 */
static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
{
	return (verf & ~1UL) == nfs_save_change_attribute(dir);
}

static void nfs_set_verifier_delegated(unsigned long *verf)
{
	*verf |= 1UL;
}

#if IS_ENABLED(CONFIG_NFS_V4)
static void nfs_unset_verifier_delegated(unsigned long *verf)
{
	*verf &= ~1UL;
}
#endif /* IS_ENABLED(CONFIG_NFS_V4) */

static bool nfs_test_verifier_delegated(unsigned long verf)
{
	return verf & 1;
}

static bool nfs_verifier_is_delegated(struct dentry *dentry)
{
	return nfs_test_verifier_delegated(dentry->d_time);
}

static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
{
	struct inode *inode = d_inode(dentry);
1429
	struct inode *dir = d_inode(dentry->d_parent);
1430

1431 1432
	if (!nfs_verify_change_attribute(dir, verf))
		return;
1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484
	if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
		nfs_set_verifier_delegated(&verf);
	dentry->d_time = verf;
}

/**
 * nfs_set_verifier - save a parent directory verifier in the dentry
 * @dentry: pointer to dentry
 * @verf: verifier to save
 *
 * Saves the parent directory verifier in @dentry. If the inode has
 * a delegation, we also tag the dentry as having been revalidated
 * while holding a delegation so that we know we don't have to
 * look it up again after a directory change.
 */
void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
{

	spin_lock(&dentry->d_lock);
	nfs_set_verifier_locked(dentry, verf);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL_GPL(nfs_set_verifier);

#if IS_ENABLED(CONFIG_NFS_V4)
/**
 * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
 * @inode: pointer to inode
 *
 * Iterates through the dentries in the inode alias list and clears
 * the tag used to indicate that the dentry has been revalidated
 * while holding a delegation.
 * This function is intended for use when the delegation is being
 * returned or revoked.
 */
void nfs_clear_verifier_delegated(struct inode *inode)
{
	struct dentry *alias;

	if (!inode)
		return;
	spin_lock(&inode->i_lock);
	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
		spin_lock(&alias->d_lock);
		nfs_unset_verifier_delegated(&alias->d_time);
		spin_unlock(&alias->d_lock);
	}
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
#endif /* IS_ENABLED(CONFIG_NFS_V4) */

1485 1486 1487 1488 1489 1490 1491 1492
static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
{
	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
	    d_really_is_negative(dentry))
		return dentry->d_time == inode_peek_iversion_raw(dir);
	return nfs_verify_change_attribute(dir, dentry->d_time);
}

Linus Torvalds's avatar
Linus Torvalds committed
1493 1494 1495 1496
/*
 * A check for whether or not the parent directory has changed.
 * In the case it has, we assume that the dentries are untrustworthy
 * and may need to be looked up again.
1497
 * If rcu_walk prevents us from performing a full check, return 0.
Linus Torvalds's avatar
Linus Torvalds committed
1498
 */
1499 1500
static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
			      int rcu_walk)
Linus Torvalds's avatar
Linus Torvalds committed
1501 1502 1503
{
	if (IS_ROOT(dentry))
		return 1;
1504 1505
	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
		return 0;
1506
	if (!nfs_dentry_verify_change(dir, dentry))
1507 1508
		return 0;
	/* Revalidate nfsi->cache_change_attribute before we declare a match */
1509 1510 1511 1512 1513 1514
	if (nfs_mapping_need_revalidate_inode(dir)) {
		if (rcu_walk)
			return 0;
		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
			return 0;
	}
1515
	if (!nfs_dentry_verify_change(dir, dentry))
1516 1517
		return 0;
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
1518 1519
}

1520 1521 1522 1523
/*
 * Use intent information to check whether or not we're going to do
 * an O_EXCL create using this path component.
 */
1524
static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1525 1526 1527
{
	if (NFS_PROTO(dir)->version == 2)
		return 0;
1528
	return flags & LOOKUP_EXCL;
1529 1530
}

1531 1532 1533 1534 1535 1536 1537 1538
/*
 * Inode and filehandle revalidation for lookups.
 *
 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
 * or if the intent information indicates that we're about to open this
 * particular file and the "nocto" mount flag is not set.
 *
 */
1539
static
1540
int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
1541 1542
{
	struct nfs_server *server = NFS_SERVER(inode);
1543
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
1544

1545
	if (IS_AUTOMOUNT(inode))
1546
		return 0;
1547 1548 1549 1550 1551 1552 1553

	if (flags & LOOKUP_OPEN) {
		switch (inode->i_mode & S_IFMT) {
		case S_IFREG:
			/* A NFSv4 OPEN will revalidate later */
			if (server->caps & NFS_CAP_ATOMIC_OPEN)
				goto out;
1554
			fallthrough;
1555 1556 1557 1558 1559 1560 1561 1562
		case S_IFDIR:
			if (server->flags & NFS_MOUNT_NOCTO)
				break;
			/* NFS close-to-open cache consistency validation */
			goto out_force;
		}
	}

1563
	/* VFS wants an on-the-wire revalidation */
1564
	if (flags & LOOKUP_REVAL)
1565
		goto out_force;
1566
out:
1567 1568 1569 1570 1571 1572
	if (inode->i_nlink > 0 ||
	    (inode->i_nlink == 0 &&
	     test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
		return 0;
	else
		return -ESTALE;
Linus Torvalds's avatar
Linus Torvalds committed
1573
out_force:
1574 1575
	if (flags & LOOKUP_RCU)
		return -ECHILD;
1576 1577 1578 1579
	ret = __nfs_revalidate_inode(server, inode);
	if (ret != 0)
		return ret;
	goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1580 1581
}

1582 1583 1584
static void nfs_mark_dir_for_revalidate(struct inode *inode)
{
	spin_lock(&inode->i_lock);
1585
	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
1586 1587 1588
	spin_unlock(&inode->i_lock);
}

Linus Torvalds's avatar
Linus Torvalds committed
1589 1590 1591 1592 1593 1594
/*
 * We judge how long we want to trust negative
 * dentries by looking at the parent inode mtime.
 *
 * If parent mtime has changed, we revalidate, else we wait for a
 * period corresponding to the parent's attribute cache timeout value.
1595 1596 1597
 *
 * If LOOKUP_RCU prevents us from performing a full check, return 1
 * suggesting a reval is needed.
1598 1599 1600
 *
 * Note that when creating a new file, or looking up a rename target,
 * then it shouldn't be necessary to revalidate a negative dentry.
Linus Torvalds's avatar
Linus Torvalds committed
1601 1602 1603
 */
static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1604
		       unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
1605
{
1606
	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
Linus Torvalds's avatar
Linus Torvalds committed
1607
		return 0;
1608 1609
	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
		return 1;
1610 1611 1612
	/* Case insensitive server? Revalidate negative dentries */
	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
		return 1;
1613
	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
Linus Torvalds's avatar
Linus Torvalds committed
1614 1615
}

1616 1617 1618 1619 1620 1621
static int
nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
			   struct inode *inode, int error)
{
	switch (error) {
	case 1:
1622
		break;
1623
	case 0:
1624 1625 1626 1627 1628 1629 1630
		/*
		 * We can't d_drop the root of a disconnected tree:
		 * its d_hash is on the s_anon list and d_drop() would hide
		 * it from shrink_dcache_for_unmount(), leading to busy
		 * inodes on unmount and further oopses.
		 */
		if (inode && IS_ROOT(dentry))
1631 1632
			error = 1;
		break;
1633
	}
1634
	trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658
	return error;
}

static int
nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
			       unsigned int flags)
{
	int ret = 1;
	if (nfs_neg_need_reval(dir, dentry, flags)) {
		if (flags & LOOKUP_RCU)
			return -ECHILD;
		ret = 0;
	}
	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
}

static int
nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
				struct inode *inode)
{
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}

1659 1660 1661
static int nfs_lookup_revalidate_dentry(struct inode *dir,
					struct dentry *dentry,
					struct inode *inode, unsigned int flags)
1662 1663 1664
{
	struct nfs_fh *fhandle;
	struct nfs_fattr *fattr;
1665
	unsigned long dir_verifier;
1666 1667
	int ret;

1668 1669
	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);

1670 1671
	ret = -ENOMEM;
	fhandle = nfs_alloc_fhandle();
1672 1673
	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
	if (fhandle == NULL || fattr == NULL)
1674 1675
		goto out;

1676
	dir_verifier = nfs_save_change_attribute(dir);
1677
	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1678
	if (ret < 0) {
1679 1680 1681
		switch (ret) {
		case -ESTALE:
		case -ENOENT:
1682
			ret = 0;
1683 1684 1685 1686 1687
			break;
		case -ETIMEDOUT:
			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
				ret = 1;
		}
1688 1689
		goto out;
	}
1690 1691 1692 1693

	/* Request help from readdirplus */
	nfs_lookup_advise_force_readdirplus(dir, flags);

1694 1695 1696 1697 1698 1699
	ret = 0;
	if (nfs_compare_fh(NFS_FH(inode), fhandle))
		goto out;
	if (nfs_refresh_inode(inode, fattr) < 0)
		goto out;

1700
	nfs_setsecurity(inode, fattr);
1701
	nfs_set_verifier(dentry, dir_verifier);
1702 1703 1704 1705 1706

	ret = 1;
out:
	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
1707 1708 1709 1710 1711

	/*
	 * If the lookup failed despite the dentry change attribute being
	 * a match, then we should revalidate the directory cache.
	 */
1712
	if (!ret && nfs_dentry_verify_change(dir, dentry))
1713
		nfs_mark_dir_for_revalidate(dir);
1714 1715 1716
	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}

Linus Torvalds's avatar
Linus Torvalds committed
1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
/*
 * This is called every time the dcache has a lookup hit,
 * and we should check whether we can really trust that
 * lookup.
 *
 * NOTE! The hit can be a negative hit too, don't assume
 * we have an inode!
 *
 * If the parent directory is seen to have changed, we throw out the
 * cached dentry and do a new lookup.
 */
1728 1729 1730
static int
nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
			 unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
1731 1732 1733 1734
{
	struct inode *inode;
	int error;

1735
	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1736
	inode = d_inode(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1737

1738 1739
	if (!inode)
		return nfs_lookup_revalidate_negative(dir, dentry, flags);
Linus Torvalds's avatar
Linus Torvalds committed
1740 1741

	if (is_bad_inode(inode)) {
1742 1743
		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
				__func__, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1744 1745 1746
		goto out_bad;
	}

1747 1748 1749 1750
	if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 &&
	    nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
		goto out_bad;

1751
	if (nfs_verifier_is_delegated(dentry))
1752
		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1753

Linus Torvalds's avatar
Linus Torvalds committed
1754
	/* Force a full look up iff the parent directory has changed */
1755
	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1756
	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1757 1758 1759
		error = nfs_lookup_verify_inode(inode, flags);
		if (error) {
			if (error == -ESTALE)
1760
				nfs_mark_dir_for_revalidate(dir);
1761
			goto out_bad;
1762
		}
Linus Torvalds's avatar
Linus Torvalds committed
1763 1764 1765
		goto out_valid;
	}

1766 1767 1768
	if (flags & LOOKUP_RCU)
		return -ECHILD;

Linus Torvalds's avatar
Linus Torvalds committed
1769 1770 1771
	if (NFS_STALE(inode))
		goto out_bad;

1772
	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
1773 1774 1775 1776 1777 1778 1779
out_valid:
	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
	if (flags & LOOKUP_RCU)
		return -ECHILD;
	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
}
1780

1781
static int
1782 1783
__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
			int (*reval)(struct inode *, struct dentry *, unsigned int))
1784 1785 1786 1787
{
	struct dentry *parent;
	struct inode *dir;
	int ret;
1788

1789
	if (flags & LOOKUP_RCU) {
1790 1791
		if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
			return -ECHILD;
1792 1793 1794 1795
		parent = READ_ONCE(dentry->d_parent);
		dir = d_inode_rcu(parent);
		if (!dir)
			return -ECHILD;
1796
		ret = reval(dir, dentry, flags);
1797
		if (parent != READ_ONCE(dentry->d_parent))
1798
			return -ECHILD;
1799
	} else {
1800 1801 1802
		/* Wait for unlink to complete */
		wait_var_event(&dentry->d_fsdata,
			       dentry->d_fsdata != NFS_FSDATA_BLOCKED);
1803
		parent = dget_parent(dentry);
1804
		ret = reval(d_inode(parent), dentry, flags);
1805
		dput(parent);
Linus Torvalds's avatar
Linus Torvalds committed
1806
	}
1807
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1808 1809
}

1810 1811 1812 1813 1814
static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
}

1815
/*
1816
 * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1817 1818 1819 1820 1821 1822 1823 1824 1825
 * when we don't really care about the dentry name. This is called when a
 * pathwalk ends on a dentry that was not found via a normal lookup in the
 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
 *
 * In this situation, we just want to verify that the inode itself is OK
 * since the dentry might have changed on the server.
 */
static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
1826
	struct inode *inode = d_inode(dentry);
1827
	int error = 0;
1828 1829 1830 1831 1832 1833 1834

	/*
	 * I believe we can only get a negative dentry here in the case of a
	 * procfs-style symlink. Just assume it's correct for now, but we may
	 * eventually need to do something more here.
	 */
	if (!inode) {
1835 1836
		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
				__func__, dentry);
1837 1838 1839 1840
		return 1;
	}

	if (is_bad_inode(inode)) {
1841 1842
		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
				__func__, dentry);
1843 1844 1845
		return 0;
	}

1846
	error = nfs_lookup_verify_inode(inode, flags);
1847 1848 1849 1850 1851
	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
			__func__, inode->i_ino, error ? "invalid" : "valid");
	return !error;
}

Linus Torvalds's avatar
Linus Torvalds committed
1852 1853 1854
/*
 * This is called from dput() when d_count is going to 0.
 */
Nick Piggin's avatar
Nick Piggin committed
1855
static int nfs_dentry_delete(const struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
1856
{
1857 1858
	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
		dentry, dentry->d_flags);
Linus Torvalds's avatar
Linus Torvalds committed
1859

1860
	/* Unhash any dentry with a stale inode */
1861
	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1862 1863
		return 1;

Linus Torvalds's avatar
Linus Torvalds committed
1864 1865 1866 1867
	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
		/* Unhash it, so that ->d_iput() would be called */
		return 1;
	}
1868
	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
Linus Torvalds's avatar
Linus Torvalds committed
1869 1870 1871 1872 1873 1874 1875 1876
		/* Unhash it, so that ancestors of killed async unlink
		 * files will be cleaned up during umount */
		return 1;
	}
	return 0;

}

1877
/* Ensure that we revalidate inode->i_nlink */
1878 1879 1880
static void nfs_drop_nlink(struct inode *inode)
{
	spin_lock(&inode->i_lock);
1881
	/* drop the inode if we're reasonably sure this is the last link */
1882 1883 1884
	if (inode->i_nlink > 0)
		drop_nlink(inode);
	NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
1885 1886
	nfs_set_cache_invalid(
		inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
1887
			       NFS_INO_INVALID_NLINK);
1888 1889 1890
	spin_unlock(&inode->i_lock);
}

Linus Torvalds's avatar
Linus Torvalds committed
1891 1892 1893 1894 1895 1896 1897
/*
 * Called when the dentry loses inode.
 * We use it to clean up silly-renamed files.
 */
static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
{
	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1898
		nfs_complete_unlink(dentry, inode);
1899
		nfs_drop_nlink(inode);
Linus Torvalds's avatar
Linus Torvalds committed
1900 1901 1902 1903
	}
	iput(inode);
}

1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914
static void nfs_d_release(struct dentry *dentry)
{
	/* free cached devname value, if it survived that far */
	if (unlikely(dentry->d_fsdata)) {
		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
			WARN_ON(1);
		else
			kfree(dentry->d_fsdata);
	}
}

Al Viro's avatar
Al Viro committed
1915
const struct dentry_operations nfs_dentry_operations = {
Linus Torvalds's avatar
Linus Torvalds committed
1916
	.d_revalidate	= nfs_lookup_revalidate,
1917
	.d_weak_revalidate	= nfs_weak_revalidate,
Linus Torvalds's avatar
Linus Torvalds committed
1918 1919
	.d_delete	= nfs_dentry_delete,
	.d_iput		= nfs_dentry_iput,
1920
	.d_automount	= nfs_d_automount,
1921
	.d_release	= nfs_d_release,
Linus Torvalds's avatar
Linus Torvalds committed
1922
};
1923
EXPORT_SYMBOL_GPL(nfs_dentry_operations);
Linus Torvalds's avatar
Linus Torvalds committed
1924

1925
struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
1926 1927 1928
{
	struct dentry *res;
	struct inode *inode = NULL;
1929 1930
	struct nfs_fh *fhandle = NULL;
	struct nfs_fattr *fattr = NULL;
1931
	unsigned long dir_verifier;
Linus Torvalds's avatar
Linus Torvalds committed
1932 1933
	int error;

1934
	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1935
	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
Linus Torvalds's avatar
Linus Torvalds committed
1936

1937 1938
	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
		return ERR_PTR(-ENAMETOOLONG);
Linus Torvalds's avatar
Linus Torvalds committed
1939

1940 1941 1942 1943
	/*
	 * If we're doing an exclusive create, optimize away the lookup
	 * but don't hash the dentry.
	 */
1944
	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
1945
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1946

1947 1948
	res = ERR_PTR(-ENOMEM);
	fhandle = nfs_alloc_fhandle();
1949
	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
1950 1951 1952
	if (fhandle == NULL || fattr == NULL)
		goto out;

1953
	dir_verifier = nfs_save_change_attribute(dir);
1954
	trace_nfs_lookup_enter(dir, dentry, flags);
1955
	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1956 1957 1958
	if (error == -ENOENT) {
		if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
			dir_verifier = inode_peek_iversion_raw(dir);
Linus Torvalds's avatar
Linus Torvalds committed
1959
		goto no_entry;
1960
	}
Linus Torvalds's avatar
Linus Torvalds committed
1961 1962
	if (error < 0) {
		res = ERR_PTR(error);
1963
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1964
	}
1965
	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
Namhyung Kim's avatar
Namhyung Kim committed
1966
	res = ERR_CAST(inode);
1967
	if (IS_ERR(res))
1968
		goto out;
1969

1970
	/* Notify readdir to use READDIRPLUS */
1971
	nfs_lookup_advise_force_readdirplus(dir, flags);
1972

Linus Torvalds's avatar
Linus Torvalds committed
1973
no_entry:
1974
	res = d_splice_alias(inode, dentry);
1975 1976
	if (res != NULL) {
		if (IS_ERR(res))
1977
			goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1978
		dentry = res;
1979
	}
1980
	nfs_set_verifier(dentry, dir_verifier);
Linus Torvalds's avatar
Linus Torvalds committed
1981
out:
1982
	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
1983 1984
	nfs_free_fattr(fattr);
	nfs_free_fhandle(fhandle);
Linus Torvalds's avatar
Linus Torvalds committed
1985 1986
	return res;
}
1987
EXPORT_SYMBOL_GPL(nfs_lookup);
Linus Torvalds's avatar
Linus Torvalds committed
1988

1989 1990 1991 1992 1993 1994 1995 1996
void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
{
	/* Case insensitive server? Revalidate dentries */
	if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
		d_prune_aliases(inode);
}
EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);

1997
#if IS_ENABLED(CONFIG_NFS_V4)
1998
static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
Linus Torvalds's avatar
Linus Torvalds committed
1999

Al Viro's avatar
Al Viro committed
2000
const struct dentry_operations nfs4_dentry_operations = {
2001
	.d_revalidate	= nfs4_lookup_revalidate,
2002
	.d_weak_revalidate	= nfs_weak_revalidate,
Linus Torvalds's avatar
Linus Torvalds committed
2003 2004
	.d_delete	= nfs_dentry_delete,
	.d_iput		= nfs_dentry_iput,
2005
	.d_automount	= nfs_d_automount,
2006
	.d_release	= nfs_d_release,
Linus Torvalds's avatar
Linus Torvalds committed
2007
};
2008
EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
Linus Torvalds's avatar
Linus Torvalds committed
2009

2010
static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
2011
{
2012
	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
2013 2014 2015 2016
}

static int do_open(struct inode *inode, struct file *filp)
{
2017
	nfs_fscache_open_file(inode, filp);
2018 2019 2020
	return 0;
}

Al Viro's avatar
Al Viro committed
2021 2022
static int nfs_finish_open(struct nfs_open_context *ctx,
			   struct dentry *dentry,
2023
			   struct file *file, unsigned open_flags)
2024
{
2025 2026
	int err;

2027
	err = finish_open(file, dentry, do_open);
Al Viro's avatar
Al Viro committed
2028
	if (err)
Al Viro's avatar
Al Viro committed
2029
		goto out;
2030
	if (S_ISREG(file_inode(file)->i_mode))
2031 2032
		nfs_file_set_open_context(file, ctx);
	else
2033
		err = -EOPENSTALE;
2034
out:
Al Viro's avatar
Al Viro committed
2035
	return err;
2036 2037
}

2038 2039
int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
		    struct file *file, unsigned open_flags,
2040
		    umode_t mode)
Linus Torvalds's avatar
Linus Torvalds committed
2041
{
2042
	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
2043
	struct nfs_open_context *ctx;
2044 2045
	struct dentry *res;
	struct iattr attr = { .ia_valid = ATTR_OPEN };
2046
	struct inode *inode;
2047
	unsigned int lookup_flags = 0;
2048
	unsigned long dir_verifier;
2049
	bool switched = false;
2050
	int created = 0;
2051
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
2052

2053
	/* Expect a negative dentry */
2054
	BUG_ON(d_inode(dentry));
2055

2056
	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
2057
			dir->i_sb->s_id, dir->i_ino, dentry);
Chuck Lever's avatar
Chuck Lever committed
2058

2059 2060 2061 2062
	err = nfs_check_flags(open_flags);
	if (err)
		return err;

2063 2064
	/* NFS only supports OPEN on regular files */
	if ((open_flags & O_DIRECTORY)) {
2065
		if (!d_in_lookup(dentry)) {
2066 2067 2068 2069 2070
			/*
			 * Hashed negative dentry with O_DIRECTORY: dentry was
			 * revalidated and is fine, no need to perform lookup
			 * again
			 */
Al Viro's avatar
Al Viro committed
2071
			return -ENOENT;
2072
		}
2073
		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
Linus Torvalds's avatar
Linus Torvalds committed
2074
		goto no_open;
2075
	}
Linus Torvalds's avatar
Linus Torvalds committed
2076

2077
	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
Al Viro's avatar
Al Viro committed
2078
		return -ENAMETOOLONG;
2079

2080
	if (open_flags & O_CREAT) {
2081 2082 2083 2084 2085
		struct nfs_server *server = NFS_SERVER(dir);

		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
			mode &= ~current_umask();

2086
		attr.ia_valid |= ATTR_MODE;
2087
		attr.ia_mode = mode;
2088
	}
2089 2090 2091
	if (open_flags & O_TRUNC) {
		attr.ia_valid |= ATTR_SIZE;
		attr.ia_size = 0;
2092 2093
	}

2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104
	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
		d_drop(dentry);
		switched = true;
		dentry = d_alloc_parallel(dentry->d_parent,
					  &dentry->d_name, &wq);
		if (IS_ERR(dentry))
			return PTR_ERR(dentry);
		if (unlikely(!d_in_lookup(dentry)))
			return finish_no_open(file, dentry);
	}

2105
	ctx = create_nfs_open_context(dentry, open_flags, file);
2106 2107
	err = PTR_ERR(ctx);
	if (IS_ERR(ctx))
Al Viro's avatar
Al Viro committed
2108
		goto out;
2109

2110
	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
2111 2112 2113
	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
	if (created)
		file->f_mode |= FMODE_CREATED;
2114
	if (IS_ERR(inode)) {
2115
		err = PTR_ERR(inode);
2116
		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2117
		put_nfs_open_context(ctx);
2118
		d_drop(dentry);
2119 2120
		switch (err) {
		case -ENOENT:
2121
			d_splice_alias(NULL, dentry);
2122 2123 2124 2125 2126
			if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
				dir_verifier = inode_peek_iversion_raw(dir);
			else
				dir_verifier = nfs_save_change_attribute(dir);
			nfs_set_verifier(dentry, dir_verifier);
2127 2128 2129 2130 2131 2132
			break;
		case -EISDIR:
		case -ENOTDIR:
			goto no_open;
		case -ELOOP:
			if (!(open_flags & O_NOFOLLOW))
2133
				goto no_open;
2134
			break;
Linus Torvalds's avatar
Linus Torvalds committed
2135
			/* case -EINVAL: */
2136 2137
		default:
			break;
Linus Torvalds's avatar
Linus Torvalds committed
2138
		}
Al Viro's avatar
Al Viro committed
2139
		goto out;
2140
	}
2141
	file->f_mode |= FMODE_CAN_ODIRECT;
2142

2143
	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
2144
	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2145
	put_nfs_open_context(ctx);
Al Viro's avatar
Al Viro committed
2146
out:
2147 2148 2149 2150
	if (unlikely(switched)) {
		d_lookup_done(dentry);
		dput(dentry);
	}
Al Viro's avatar
Al Viro committed
2151
	return err;
2152

Linus Torvalds's avatar
Linus Torvalds committed
2153
no_open:
2154
	res = nfs_lookup(dir, dentry, lookup_flags);
2155 2156 2157
	if (!res) {
		inode = d_inode(dentry);
		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2158
		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
2159
			res = ERR_PTR(-ENOTDIR);
2160 2161
		else if (inode && S_ISREG(inode->i_mode))
			res = ERR_PTR(-EOPENSTALE);
2162 2163 2164
	} else if (!IS_ERR(res)) {
		inode = d_inode(res);
		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2165
		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
2166 2167
			dput(res);
			res = ERR_PTR(-ENOTDIR);
2168 2169 2170
		} else if (inode && S_ISREG(inode->i_mode)) {
			dput(res);
			res = ERR_PTR(-EOPENSTALE);
2171 2172
		}
	}
2173 2174 2175 2176 2177 2178 2179
	if (switched) {
		d_lookup_done(dentry);
		if (!res)
			res = dentry;
		else
			dput(dentry);
	}
2180
	if (IS_ERR(res))
2181
		return PTR_ERR(res);
Al Viro's avatar
Al Viro committed
2182
	return finish_no_open(file, res);
Linus Torvalds's avatar
Linus Torvalds committed
2183
}
2184
EXPORT_SYMBOL_GPL(nfs_atomic_open);
Linus Torvalds's avatar
Linus Torvalds committed
2185

2186 2187 2188
static int
nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
			  unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
2189
{
Nick Piggin's avatar
Nick Piggin committed
2190
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
2191

2192
	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
2193
		goto full_reval;
2194
	if (d_mountpoint(dentry))
2195
		goto full_reval;
2196

2197
	inode = d_inode(dentry);
2198

Linus Torvalds's avatar
Linus Torvalds committed
2199 2200 2201
	/* We can't create new files in nfs_open_revalidate(), so we
	 * optimize away revalidation of negative dentries.
	 */
2202 2203 2204
	if (inode == NULL)
		goto full_reval;

2205
	if (nfs_verifier_is_delegated(dentry))
2206
		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
2207

Linus Torvalds's avatar
Linus Torvalds committed
2208 2209
	/* NFS only supports OPEN on regular files */
	if (!S_ISREG(inode->i_mode))
2210 2211
		goto full_reval;

Linus Torvalds's avatar
Linus Torvalds committed
2212
	/* We cannot do exclusive creation on a positive dentry */
2213 2214 2215 2216 2217 2218
	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
		goto reval_dentry;

	/* Check if the directory changed */
	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
		goto reval_dentry;
Linus Torvalds's avatar
Linus Torvalds committed
2219

2220
	/* Let f_op->open() actually open (and revalidate) the file */
2221 2222 2223 2224
	return 1;
reval_dentry:
	if (flags & LOOKUP_RCU)
		return -ECHILD;
2225
	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
2226

2227 2228 2229
full_reval:
	return nfs_do_lookup_revalidate(dir, dentry, flags);
}
2230

2231 2232 2233 2234
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
	return __nfs_lookup_revalidate(dentry, flags,
			nfs4_do_lookup_revalidate);
2235 2236
}

Linus Torvalds's avatar
Linus Torvalds committed
2237 2238
#endif /* CONFIG_NFSV4 */

2239 2240
struct dentry *
nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
2241
				struct nfs_fattr *fattr)
Linus Torvalds's avatar
Linus Torvalds committed
2242
{
2243
	struct dentry *parent = dget_parent(dentry);
2244
	struct inode *dir = d_inode(parent);
Linus Torvalds's avatar
Linus Torvalds committed
2245
	struct inode *inode;
2246
	struct dentry *d;
2247
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
2248

2249 2250
	d_drop(dentry);

Linus Torvalds's avatar
Linus Torvalds committed
2251
	if (fhandle->size == 0) {
2252
		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
Linus Torvalds's avatar
Linus Torvalds committed
2253
		if (error)
2254
			goto out_error;
Linus Torvalds's avatar
Linus Torvalds committed
2255
	}
2256
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
Linus Torvalds's avatar
Linus Torvalds committed
2257 2258
	if (!(fattr->valid & NFS_ATTR_FATTR)) {
		struct nfs_server *server = NFS_SB(dentry->d_sb);
2259
		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
2260
				fattr, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
2261
		if (error < 0)
2262
			goto out_error;
Linus Torvalds's avatar
Linus Torvalds committed
2263
	}
2264
	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
2265
	d = d_splice_alias(inode, dentry);
2266 2267
out:
	dput(parent);
2268
	return d;
2269
out_error:
2270 2271 2272 2273 2274 2275 2276 2277 2278
	d = ERR_PTR(error);
	goto out;
}
EXPORT_SYMBOL_GPL(nfs_add_or_obtain);

/*
 * Code common to create, mkdir, and mknod.
 */
int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
2279
				struct nfs_fattr *fattr)
2280 2281 2282
{
	struct dentry *d;

2283
	d = nfs_add_or_obtain(dentry, fhandle, fattr);
2284 2285 2286 2287 2288 2289
	if (IS_ERR(d))
		return PTR_ERR(d);

	/* Callers don't care */
	dput(d);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2290
}
2291
EXPORT_SYMBOL_GPL(nfs_instantiate);
Linus Torvalds's avatar
Linus Torvalds committed
2292 2293 2294 2295 2296 2297 2298

/*
 * Following a failed create operation, we drop the dentry rather
 * than retain a negative dentry. This avoids a problem in the event
 * that the operation succeeded on the server, but an error in the
 * reply path made it appear to have failed.
 */
2299 2300
int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
	       struct dentry *dentry, umode_t mode, bool excl)
Linus Torvalds's avatar
Linus Torvalds committed
2301 2302
{
	struct iattr attr;
Al Viro's avatar
Al Viro committed
2303
	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
Linus Torvalds's avatar
Linus Torvalds committed
2304 2305
	int error;

2306
	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
2307
			dir->i_sb->s_id, dir->i_ino, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2308 2309 2310 2311

	attr.ia_mode = mode;
	attr.ia_valid = ATTR_MODE;

2312
	trace_nfs_create_enter(dir, dentry, open_flags);
2313
	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
2314
	trace_nfs_create_exit(dir, dentry, open_flags, error);
Linus Torvalds's avatar
Linus Torvalds committed
2315 2316 2317 2318 2319 2320 2321
	if (error != 0)
		goto out_err;
	return 0;
out_err:
	d_drop(dentry);
	return error;
}
2322
EXPORT_SYMBOL_GPL(nfs_create);
Linus Torvalds's avatar
Linus Torvalds committed
2323 2324 2325 2326

/*
 * See comments for nfs_proc_create regarding failed operations.
 */
2327
int
2328 2329
nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
	  struct dentry *dentry, umode_t mode, dev_t rdev)
Linus Torvalds's avatar
Linus Torvalds committed
2330 2331 2332 2333
{
	struct iattr attr;
	int status;

2334
	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
2335
			dir->i_sb->s_id, dir->i_ino, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2336 2337 2338 2339

	attr.ia_mode = mode;
	attr.ia_valid = ATTR_MODE;

2340
	trace_nfs_mknod_enter(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2341
	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
2342
	trace_nfs_mknod_exit(dir, dentry, status);
Linus Torvalds's avatar
Linus Torvalds committed
2343 2344 2345 2346 2347 2348 2349
	if (status != 0)
		goto out_err;
	return 0;
out_err:
	d_drop(dentry);
	return status;
}
2350
EXPORT_SYMBOL_GPL(nfs_mknod);
Linus Torvalds's avatar
Linus Torvalds committed
2351 2352 2353 2354

/*
 * See comments for nfs_proc_create regarding failed operations.
 */
2355 2356
int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
	      struct dentry *dentry, umode_t mode)
Linus Torvalds's avatar
Linus Torvalds committed
2357 2358 2359 2360
{
	struct iattr attr;
	int error;

2361
	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
2362
			dir->i_sb->s_id, dir->i_ino, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2363 2364 2365 2366

	attr.ia_valid = ATTR_MODE;
	attr.ia_mode = mode | S_IFDIR;

2367
	trace_nfs_mkdir_enter(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2368
	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
2369
	trace_nfs_mkdir_exit(dir, dentry, error);
Linus Torvalds's avatar
Linus Torvalds committed
2370 2371 2372 2373 2374 2375 2376
	if (error != 0)
		goto out_err;
	return 0;
out_err:
	d_drop(dentry);
	return error;
}
2377
EXPORT_SYMBOL_GPL(nfs_mkdir);
Linus Torvalds's avatar
Linus Torvalds committed
2378

2379 2380
static void nfs_dentry_handle_enoent(struct dentry *dentry)
{
Al Viro's avatar
Al Viro committed
2381
	if (simple_positive(dentry))
2382 2383 2384
		d_delete(dentry);
}

2385 2386 2387 2388 2389
static void nfs_dentry_remove_handle_error(struct inode *dir,
					   struct dentry *dentry, int error)
{
	switch (error) {
	case -ENOENT:
2390 2391
		if (d_really_is_positive(dentry))
			d_delete(dentry);
2392 2393
		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
		break;
2394
	case 0:
2395
		nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
2396 2397 2398 2399
		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
	}
}

2400
int nfs_rmdir(struct inode *dir, struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
2401 2402 2403
{
	int error;

2404
	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
2405
			dir->i_sb->s_id, dir->i_ino, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2406

2407
	trace_nfs_rmdir_enter(dir, dentry);
2408
	if (d_really_is_positive(dentry)) {
Al Viro's avatar
Al Viro committed
2409
		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2410 2411 2412 2413
		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
		/* Ensure the VFS deletes this inode */
		switch (error) {
		case 0:
2414
			clear_nlink(d_inode(dentry));
2415 2416 2417 2418
			break;
		case -ENOENT:
			nfs_dentry_handle_enoent(dentry);
		}
Al Viro's avatar
Al Viro committed
2419
		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2420 2421
	} else
		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2422
	nfs_dentry_remove_handle_error(dir, dentry, error);
2423
	trace_nfs_rmdir_exit(dir, dentry, error);
Linus Torvalds's avatar
Linus Torvalds committed
2424 2425 2426

	return error;
}
2427
EXPORT_SYMBOL_GPL(nfs_rmdir);
Linus Torvalds's avatar
Linus Torvalds committed
2428 2429 2430 2431 2432 2433 2434 2435 2436 2437

/*
 * Remove a file after making sure there are no pending writes,
 * and after checking that the file has only one user. 
 *
 * We invalidate the attribute cache and free the inode prior to the operation
 * to avoid possible races if the server reuses the inode.
 */
static int nfs_safe_remove(struct dentry *dentry)
{
2438 2439
	struct inode *dir = d_inode(dentry->d_parent);
	struct inode *inode = d_inode(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2440 2441
	int error = -EBUSY;
		
2442
	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2443 2444 2445 2446 2447 2448 2449

	/* If the dentry was sillyrenamed, we simply call d_delete() */
	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
		error = 0;
		goto out;
	}

2450
	trace_nfs_remove_enter(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2451
	if (inode != NULL) {
2452
		error = NFS_PROTO(dir)->remove(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2453
		if (error == 0)
2454
			nfs_drop_nlink(inode);
Linus Torvalds's avatar
Linus Torvalds committed
2455
	} else
2456
		error = NFS_PROTO(dir)->remove(dir, dentry);
2457 2458
	if (error == -ENOENT)
		nfs_dentry_handle_enoent(dentry);
2459
	trace_nfs_remove_exit(dir, dentry, error);
Linus Torvalds's avatar
Linus Torvalds committed
2460 2461 2462 2463 2464 2465 2466 2467 2468
out:
	return error;
}

/*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
 *  belongs to an active ".nfs..." file and we return -EBUSY.
 *
 *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
 */
2469
int nfs_unlink(struct inode *dir, struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
2470 2471 2472
{
	int error;

2473
	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
2474
		dir->i_ino, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2475

2476
	trace_nfs_unlink_enter(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2477
	spin_lock(&dentry->d_lock);
2478 2479
	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
					     &NFS_I(d_inode(dentry))->flags)) {
Linus Torvalds's avatar
Linus Torvalds committed
2480
		spin_unlock(&dentry->d_lock);
Trond Myklebust's avatar
Trond Myklebust committed
2481
		/* Start asynchronous writeout of the inode */
2482
		write_inode_now(d_inode(dentry), 0);
Linus Torvalds's avatar
Linus Torvalds committed
2483
		error = nfs_sillyrename(dir, dentry);
2484
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
2485
	}
2486 2487 2488 2489 2490 2491 2492
	/* We must prevent any concurrent open until the unlink
	 * completes.  ->d_revalidate will wait for ->d_fsdata
	 * to clear.  We set it here to ensure no lookup succeeds until
	 * the unlink is complete on the server.
	 */
	error = -ETXTBSY;
	if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2493 2494
	    WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
		spin_unlock(&dentry->d_lock);
2495
		goto out;
2496
	}
2497 2498
	/* old devname */
	kfree(dentry->d_fsdata);
2499 2500
	dentry->d_fsdata = NFS_FSDATA_BLOCKED;

Linus Torvalds's avatar
Linus Torvalds committed
2501 2502
	spin_unlock(&dentry->d_lock);
	error = nfs_safe_remove(dentry);
2503
	nfs_dentry_remove_handle_error(dir, dentry, error);
2504 2505
	dentry->d_fsdata = NULL;
	wake_up_var(&dentry->d_fsdata);
2506 2507
out:
	trace_nfs_unlink_exit(dir, dentry, error);
Linus Torvalds's avatar
Linus Torvalds committed
2508 2509
	return error;
}
2510
EXPORT_SYMBOL_GPL(nfs_unlink);
Linus Torvalds's avatar
Linus Torvalds committed
2511

2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526
/*
 * To create a symbolic link, most file systems instantiate a new inode,
 * add a page to it containing the path, then write it out to the disk
 * using prepare_write/commit_write.
 *
 * Unfortunately the NFS client can't create the in-core inode first
 * because it needs a file handle to create an in-core inode (see
 * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
 * symlink request has completed on the server.
 *
 * So instead we allocate a raw page, copy the symname into it, then do
 * the SYMLINK request with the page as the buffer.  If it succeeds, we
 * now have a new file handle and can instantiate an in-core NFS inode
 * and move the raw page into its mapping.
 */
2527 2528
int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
		struct dentry *dentry, const char *symname)
Linus Torvalds's avatar
Linus Torvalds committed
2529
{
2530 2531
	struct page *page;
	char *kaddr;
Linus Torvalds's avatar
Linus Torvalds committed
2532
	struct iattr attr;
2533
	unsigned int pathlen = strlen(symname);
Linus Torvalds's avatar
Linus Torvalds committed
2534 2535
	int error;

2536
	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2537
		dir->i_ino, dentry, symname);
Linus Torvalds's avatar
Linus Torvalds committed
2538

2539 2540
	if (pathlen > PAGE_SIZE)
		return -ENAMETOOLONG;
Linus Torvalds's avatar
Linus Torvalds committed
2541

2542 2543
	attr.ia_mode = S_IFLNK | S_IRWXUGO;
	attr.ia_valid = ATTR_MODE;
Linus Torvalds's avatar
Linus Torvalds committed
2544

2545
	page = alloc_page(GFP_USER);
2546
	if (!page)
2547 2548
		return -ENOMEM;

2549
	kaddr = page_address(page);
2550 2551 2552 2553
	memcpy(kaddr, symname, pathlen);
	if (pathlen < PAGE_SIZE)
		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);

2554
	trace_nfs_symlink_enter(dir, dentry);
2555
	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
2556
	trace_nfs_symlink_exit(dir, dentry, error);
2557
	if (error != 0) {
2558
		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2559
			dir->i_sb->s_id, dir->i_ino,
2560
			dentry, symname, error);
Linus Torvalds's avatar
Linus Torvalds committed
2561
		d_drop(dentry);
2562 2563 2564 2565
		__free_page(page);
		return error;
	}

2566 2567
	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));

2568 2569 2570 2571
	/*
	 * No big deal if we can't add this page to the page cache here.
	 * READLINK will get the missing page from the server if needed.
	 */
2572
	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
2573 2574 2575
							GFP_KERNEL)) {
		SetPageUptodate(page);
		unlock_page(page);
2576 2577 2578 2579
		/*
		 * add_to_page_cache_lru() grabs an extra page refcount.
		 * Drop it here to avoid leaking this page later.
		 */
2580
		put_page(page);
2581 2582 2583 2584
	} else
		__free_page(page);

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2585
}
2586
EXPORT_SYMBOL_GPL(nfs_symlink);
Linus Torvalds's avatar
Linus Torvalds committed
2587

2588
int
Linus Torvalds's avatar
Linus Torvalds committed
2589 2590
nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
2591
	struct inode *inode = d_inode(old_dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2592 2593
	int error;

2594 2595
	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
		old_dentry, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2596

2597
	trace_nfs_link_enter(inode, dir, dentry);
2598
	d_drop(dentry);
2599 2600
	if (S_ISREG(inode->i_mode))
		nfs_sync_inode(inode);
Linus Torvalds's avatar
Linus Torvalds committed
2601
	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2602
	if (error == 0) {
2603
		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
Al Viro's avatar
Al Viro committed
2604
		ihold(inode);
2605
		d_add(dentry, inode);
2606
	}
2607
	trace_nfs_link_exit(inode, dir, dentry, error);
Linus Torvalds's avatar
Linus Torvalds committed
2608 2609
	return error;
}
2610
EXPORT_SYMBOL_GPL(nfs_link);
Linus Torvalds's avatar
Linus Torvalds committed
2611

2612 2613 2614 2615 2616 2617 2618 2619 2620
static void
nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
	struct dentry *new_dentry = data->new_dentry;

	new_dentry->d_fsdata = NULL;
	wake_up_var(&new_dentry->d_fsdata);
}

Linus Torvalds's avatar
Linus Torvalds committed
2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644
/*
 * RENAME
 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
 * different file handle for the same inode after a rename (e.g. when
 * moving to a different directory). A fail-safe method to do so would
 * be to look up old_dir/old_name, create a link to new_dir/new_name and
 * rename the old file using the sillyrename stuff. This way, the original
 * file in old_dir will go away when the last process iput()s the inode.
 *
 * FIXED.
 * 
 * It actually works quite well. One needs to have the possibility for
 * at least one ".nfs..." file in each directory the file ever gets
 * moved or linked to which happens automagically with the new
 * implementation that only depends on the dcache stuff instead of
 * using the inode layer
 *
 * Unfortunately, things are a little more complicated than indicated
 * above. For a cross-directory move, we want to make sure we can get
 * rid of the old inode after the operation.  This means there must be
 * no pending writes (if it's a file), and the use count must be 1.
 * If these conditions are met, we can drop the dentries before doing
 * the rename.
 */
2645 2646 2647
int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
	       struct dentry *old_dentry, struct inode *new_dir,
	       struct dentry *new_dentry, unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
2648
{
2649 2650
	struct inode *old_inode = d_inode(old_dentry);
	struct inode *new_inode = d_inode(new_dentry);
2651
	struct dentry *dentry = NULL;
2652
	struct rpc_task *task;
2653
	bool must_unblock = false;
Linus Torvalds's avatar
Linus Torvalds committed
2654 2655
	int error = -EBUSY;

2656 2657 2658
	if (flags)
		return -EINVAL;

2659 2660
	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
		 old_dentry, new_dentry,
Al Viro's avatar
Al Viro committed
2661
		 d_count(new_dentry));
Linus Torvalds's avatar
Linus Torvalds committed
2662

2663
	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
Linus Torvalds's avatar
Linus Torvalds committed
2664
	/*
2665 2666 2667 2668
	 * For non-directories, check whether the target is busy and if so,
	 * make a copy of the dentry and then do a silly-rename. If the
	 * silly-rename succeeds, the copied dentry is hashed and becomes
	 * the new target.
Linus Torvalds's avatar
Linus Torvalds committed
2669
	 */
2670
	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2671 2672 2673 2674
		/* We must prevent any concurrent open until the unlink
		 * completes.  ->d_revalidate will wait for ->d_fsdata
		 * to clear.  We set it here to ensure no lookup succeeds until
		 * the unlink is complete on the server.
2675
		 */
2676 2677 2678 2679 2680 2681 2682 2683
		error = -ETXTBSY;
		if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
		    WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
			goto out;
		if (new_dentry->d_fsdata) {
			/* old devname */
			kfree(new_dentry->d_fsdata);
			new_dentry->d_fsdata = NULL;
2684
		}
Linus Torvalds's avatar
Linus Torvalds committed
2685

2686
		spin_lock(&new_dentry->d_lock);
Al Viro's avatar
Al Viro committed
2687
		if (d_count(new_dentry) > 2) {
2688 2689
			int err;

2690 2691
			spin_unlock(&new_dentry->d_lock);

2692 2693 2694 2695 2696 2697 2698 2699
			/* copy the target dentry's name */
			dentry = d_alloc(new_dentry->d_parent,
					 &new_dentry->d_name);
			if (!dentry)
				goto out;

			/* silly-rename the existing target ... */
			err = nfs_sillyrename(new_dir, new_dentry);
2700
			if (err)
2701
				goto out;
2702 2703 2704

			new_dentry = dentry;
			new_inode = NULL;
2705 2706 2707 2708
		} else {
			new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
			must_unblock = true;
			spin_unlock(&new_dentry->d_lock);
2709
		}
2710

2711
	}
Linus Torvalds's avatar
Linus Torvalds committed
2712

2713 2714
	if (S_ISREG(old_inode->i_mode))
		nfs_sync_inode(old_inode);
2715 2716
	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
				must_unblock ? nfs_unblock_rename : NULL);
2717 2718 2719 2720 2721 2722
	if (IS_ERR(task)) {
		error = PTR_ERR(task);
		goto out;
	}

	error = rpc_wait_for_completion_task(task);
2723 2724 2725 2726 2727
	if (error != 0) {
		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
		smp_wmb();
	} else
2728 2729
		error = task->tk_status;
	rpc_put_task(task);
2730 2731 2732 2733
	/* Ensure the inode attributes are revalidated */
	if (error == 0) {
		spin_lock(&old_inode->i_lock);
		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2734 2735 2736
		nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
							 NFS_INO_INVALID_CTIME |
							 NFS_INO_REVAL_FORCED);
2737 2738
		spin_unlock(&old_inode->i_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
2739
out:
2740 2741
	trace_nfs_rename_exit(old_dir, old_dentry,
			new_dir, new_dentry, error);
2742 2743 2744 2745 2746 2747 2748 2749 2750 2751
	if (!error) {
		if (new_inode != NULL)
			nfs_drop_nlink(new_inode);
		/*
		 * The d_move() should be here instead of in an async RPC completion
		 * handler because we need the proper locks to move the dentry.  If
		 * we're interrupted by a signal, the async RPC completion handler
		 * should mark the directories for revalidation.
		 */
		d_move(old_dentry, new_dentry);
2752
		nfs_set_verifier(old_dentry,
2753 2754 2755 2756
					nfs_save_change_attribute(new_dir));
	} else if (error == -ENOENT)
		nfs_dentry_handle_enoent(old_dentry);

Linus Torvalds's avatar
Linus Torvalds committed
2757 2758 2759 2760 2761
	/* new dentry created? */
	if (dentry)
		dput(dentry);
	return error;
}
2762
EXPORT_SYMBOL_GPL(nfs_rename);
Linus Torvalds's avatar
Linus Torvalds committed
2763

2764 2765 2766 2767
static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries;

2768
static unsigned long nfs_access_max_cachesize = 4*1024*1024;
2769 2770 2771
module_param(nfs_access_max_cachesize, ulong, 0644);
MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");

2772 2773
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
2774
	put_group_info(entry->group_info);
2775
	kfree_rcu(entry, rcu_head);
2776
	smp_mb__before_atomic();
2777
	atomic_long_dec(&nfs_access_nr_entries);
2778
	smp_mb__after_atomic();
2779 2780
}

2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791
static void nfs_access_free_list(struct list_head *head)
{
	struct nfs_access_entry *cache;

	while (!list_empty(head)) {
		cache = list_entry(head->next, struct nfs_access_entry, lru);
		list_del(&cache->lru);
		nfs_access_free_entry(cache);
	}
}

2792 2793
static unsigned long
nfs_do_access_cache_scan(unsigned int nr_to_scan)
2794 2795
{
	LIST_HEAD(head);
2796
	struct nfs_inode *nfsi, *next;
2797
	struct nfs_access_entry *cache;
2798
	long freed = 0;
2799

2800
	spin_lock(&nfs_access_lru_lock);
2801
	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2802 2803 2804 2805
		struct inode *inode;

		if (nr_to_scan-- == 0)
			break;
2806
		inode = &nfsi->vfs_inode;
2807 2808 2809 2810 2811 2812 2813
		spin_lock(&inode->i_lock);
		if (list_empty(&nfsi->access_cache_entry_lru))
			goto remove_lru_entry;
		cache = list_entry(nfsi->access_cache_entry_lru.next,
				struct nfs_access_entry, lru);
		list_move(&cache->lru, &head);
		rb_erase(&cache->rb_node, &nfsi->access_cache);
2814
		freed++;
2815 2816 2817 2818 2819 2820
		if (!list_empty(&nfsi->access_cache_entry_lru))
			list_move_tail(&nfsi->access_cache_inode_lru,
					&nfs_access_lru_list);
		else {
remove_lru_entry:
			list_del_init(&nfsi->access_cache_inode_lru);
2821
			smp_mb__before_atomic();
2822
			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2823
			smp_mb__after_atomic();
2824
		}
2825
		spin_unlock(&inode->i_lock);
2826 2827
	}
	spin_unlock(&nfs_access_lru_lock);
2828
	nfs_access_free_list(&head);
2829 2830 2831
	return freed;
}

2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843
unsigned long
nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
	int nr_to_scan = sc->nr_to_scan;
	gfp_t gfp_mask = sc->gfp_mask;

	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
		return SHRINK_STOP;
	return nfs_do_access_cache_scan(nr_to_scan);
}


2844 2845 2846
unsigned long
nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
2847
	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2848 2849
}

2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865
static void
nfs_access_cache_enforce_limit(void)
{
	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
	unsigned long diff;
	unsigned int nr_to_scan;

	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
		return;
	nr_to_scan = 100;
	diff = nr_entries - nfs_access_max_cachesize;
	if (diff < nr_to_scan)
		nr_to_scan = diff;
	nfs_do_access_cache_scan(nr_to_scan);
}

2866
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
Linus Torvalds's avatar
Linus Torvalds committed
2867
{
2868
	struct rb_root *root_node = &nfsi->access_cache;
2869
	struct rb_node *n;
2870 2871 2872 2873 2874 2875
	struct nfs_access_entry *entry;

	/* Unhook entries from the cache */
	while ((n = rb_first(root_node)) != NULL) {
		entry = rb_entry(n, struct nfs_access_entry, rb_node);
		rb_erase(n, root_node);
2876
		list_move(&entry->lru, head);
2877 2878
	}
	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
Linus Torvalds's avatar
Linus Torvalds committed
2879 2880
}

2881
void nfs_access_zap_cache(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
2882
{
2883 2884 2885 2886
	LIST_HEAD(head);

	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
		return;
2887
	/* Remove from global LRU init */
2888 2889
	spin_lock(&nfs_access_lru_lock);
	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2890 2891
		list_del_init(&NFS_I(inode)->access_cache_inode_lru);

2892
	spin_lock(&inode->i_lock);
2893 2894 2895 2896
	__nfs_access_zap_cache(NFS_I(inode), &head);
	spin_unlock(&inode->i_lock);
	spin_unlock(&nfs_access_lru_lock);
	nfs_access_free_list(&head);
2897
}
2898
EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
Linus Torvalds's avatar
Linus Torvalds committed
2899

2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
{
	struct group_info *ga, *gb;
	int g;

	if (uid_lt(a->fsuid, b->fsuid))
		return -1;
	if (uid_gt(a->fsuid, b->fsuid))
		return 1;

	if (gid_lt(a->fsgid, b->fsgid))
		return -1;
	if (gid_gt(a->fsgid, b->fsgid))
		return 1;

	ga = a->group_info;
	gb = b->group_info;
	if (ga == gb)
		return 0;
	if (ga == NULL)
		return -1;
	if (gb == NULL)
		return 1;
	if (ga->ngroups < gb->ngroups)
		return -1;
	if (ga->ngroups > gb->ngroups)
		return 1;

	for (g = 0; g < ga->ngroups; g++) {
		if (gid_lt(ga->gid[g], gb->gid[g]))
			return -1;
		if (gid_gt(ga->gid[g], gb->gid[g]))
			return 1;
	}
	return 0;
}

2937
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
2938 2939 2940 2941
{
	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;

	while (n != NULL) {
2942 2943
		struct nfs_access_entry *entry =
			rb_entry(n, struct nfs_access_entry, rb_node);
2944
		int cmp = access_cmp(cred, entry);
2945

2946
		if (cmp < 0)
2947
			n = n->rb_left;
2948
		else if (cmp > 0)
2949 2950 2951
			n = n->rb_right;
		else
			return entry;
Linus Torvalds's avatar
Linus Torvalds committed
2952
	}
2953 2954 2955
	return NULL;
}

2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973
static u64 nfs_access_login_time(const struct task_struct *task,
				 const struct cred *cred)
{
	const struct task_struct *parent;
	u64 ret;

	rcu_read_lock();
	for (;;) {
		parent = rcu_dereference(task->real_parent);
		if (parent == task || cred_fscmp(parent->cred, cred) != 0)
			break;
		task = parent;
	}
	ret = task->start_time;
	rcu_read_unlock();
	return ret;
}

2974
static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
2975 2976
{
	struct nfs_inode *nfsi = NFS_I(inode);
2977
	u64 login_time = nfs_access_login_time(current, cred);
2978
	struct nfs_access_entry *cache;
2979 2980
	bool retry = true;
	int err;
2981

2982
	spin_lock(&inode->i_lock);
2983 2984 2985 2986 2987 2988 2989 2990
	for(;;) {
		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
			goto out_zap;
		cache = nfs_access_search_rbtree(inode, cred);
		err = -ENOENT;
		if (cache == NULL)
			goto out;
		/* Found an entry, is our attribute cache valid? */
2991
		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2992
			break;
2993 2994
		if (!retry)
			break;
2995 2996 2997 2998 2999 3000 3001 3002 3003 3004
		err = -ECHILD;
		if (!may_block)
			goto out;
		spin_unlock(&inode->i_lock);
		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
		if (err)
			return err;
		spin_lock(&inode->i_lock);
		retry = false;
	}
3005 3006 3007
	err = -ENOENT;
	if ((s64)(login_time - cache->timestamp) > 0)
		goto out;
3008
	*mask = cache->mask;
3009
	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
3010 3011 3012 3013 3014
	err = 0;
out:
	spin_unlock(&inode->i_lock);
	return err;
out_zap:
3015 3016
	spin_unlock(&inode->i_lock);
	nfs_access_zap_cache(inode);
3017 3018 3019
	return -ENOENT;
}

3020
static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032
{
	/* Only check the most recently returned cache entry,
	 * but do it without locking.
	 */
	struct nfs_inode *nfsi = NFS_I(inode);
	struct nfs_access_entry *cache;
	int err = -ECHILD;
	struct list_head *lh;

	rcu_read_lock();
	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
		goto out;
3033
	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
3034 3035
	cache = list_entry(lh, struct nfs_access_entry, lru);
	if (lh == &nfsi->access_cache_entry_lru ||
3036
	    access_cmp(cred, cache) != 0)
3037 3038 3039
		cache = NULL;
	if (cache == NULL)
		goto out;
3040
	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
3041
		goto out;
3042
	*mask = cache->mask;
3043
	err = 0;
3044 3045 3046 3047 3048
out:
	rcu_read_unlock();
	return err;
}

3049 3050
int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
			  u32 *mask, bool may_block)
3051 3052 3053
{
	int status;

3054
	status = nfs_access_get_cached_rcu(inode, cred, mask);
3055
	if (status != 0)
3056
		status = nfs_access_get_cached_locked(inode, cred, mask,
3057 3058 3059 3060 3061 3062
		    may_block);

	return status;
}
EXPORT_SYMBOL_GPL(nfs_access_get_cached);

3063 3064 3065
static void nfs_access_add_rbtree(struct inode *inode,
				  struct nfs_access_entry *set,
				  const struct cred *cred)
3066
{
3067 3068
	struct nfs_inode *nfsi = NFS_I(inode);
	struct rb_root *root_node = &nfsi->access_cache;
3069 3070 3071
	struct rb_node **p = &root_node->rb_node;
	struct rb_node *parent = NULL;
	struct nfs_access_entry *entry;
3072
	int cmp;
3073 3074 3075 3076 3077

	spin_lock(&inode->i_lock);
	while (*p != NULL) {
		parent = *p;
		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
3078
		cmp = access_cmp(cred, entry);
3079

3080
		if (cmp < 0)
3081
			p = &parent->rb_left;
3082
		else if (cmp > 0)
3083 3084 3085 3086
			p = &parent->rb_right;
		else
			goto found;
	}
3087
	set->timestamp = ktime_get_ns();
3088 3089
	rb_link_node(&set->rb_node, parent, p);
	rb_insert_color(&set->rb_node, root_node);
3090
	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
3091
	spin_unlock(&inode->i_lock);
3092 3093 3094
	return;
found:
	rb_replace_node(parent, &set->rb_node, root_node);
3095 3096
	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
	list_del(&entry->lru);
3097 3098 3099 3100
	spin_unlock(&inode->i_lock);
	nfs_access_free_entry(entry);
}

3101 3102
void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
			  const struct cred *cred)
3103 3104 3105 3106 3107
{
	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
	if (cache == NULL)
		return;
	RB_CLEAR_NODE(&cache->rb_node);
3108 3109 3110
	cache->fsuid = cred->fsuid;
	cache->fsgid = cred->fsgid;
	cache->group_info = get_group_info(cred->group_info);
Linus Torvalds's avatar
Linus Torvalds committed
3111
	cache->mask = set->mask;
3112

3113 3114 3115 3116 3117
	/* The above field assignments must be visible
	 * before this item appears on the lru.  We cannot easily
	 * use rcu_assign_pointer, so just force the memory barrier.
	 */
	smp_wmb();
3118
	nfs_access_add_rbtree(inode, cache, cred);
3119 3120

	/* Update accounting */
3121
	smp_mb__before_atomic();
3122
	atomic_long_inc(&nfs_access_nr_entries);
3123
	smp_mb__after_atomic();
3124 3125

	/* Add inode to global LRU list */
3126
	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
3127
		spin_lock(&nfs_access_lru_lock);
3128 3129 3130
		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
					&nfs_access_lru_list);
3131 3132
		spin_unlock(&nfs_access_lru_lock);
	}
3133
	nfs_access_cache_enforce_limit();
Linus Torvalds's avatar
Linus Torvalds committed
3134
}
3135 3136
EXPORT_SYMBOL_GPL(nfs_access_add_cache);

3137 3138 3139 3140 3141 3142
#define NFS_MAY_READ (NFS_ACCESS_READ)
#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
		NFS_ACCESS_EXTEND | \
		NFS_ACCESS_DELETE)
#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
		NFS_ACCESS_EXTEND)
3143
#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
3144 3145
#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
3146
static int
3147
nfs_access_calc_mask(u32 access_result, umode_t umode)
3148 3149 3150 3151 3152
{
	int mask = 0;

	if (access_result & NFS_MAY_READ)
		mask |= MAY_READ;
3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164
	if (S_ISDIR(umode)) {
		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
			mask |= MAY_WRITE;
		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
			mask |= MAY_EXEC;
	} else if (S_ISREG(umode)) {
		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
			mask |= MAY_WRITE;
		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
			mask |= MAY_EXEC;
	} else if (access_result & NFS_MAY_WRITE)
			mask |= MAY_WRITE;
3165 3166 3167
	return mask;
}

3168 3169
void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
{
3170
	entry->mask = access_result;
3171 3172
}
EXPORT_SYMBOL_GPL(nfs_access_set_mask);
Linus Torvalds's avatar
Linus Torvalds committed
3173

3174
static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
Linus Torvalds's avatar
Linus Torvalds committed
3175 3176
{
	struct nfs_access_entry cache;
3177
	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
3178
	int cache_mask = -1;
Linus Torvalds's avatar
Linus Torvalds committed
3179 3180
	int status;

3181 3182
	trace_nfs_access_enter(inode);

3183
	status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
Linus Torvalds's avatar
Linus Torvalds committed
3184
	if (status == 0)
3185
		goto out_cached;
Linus Torvalds's avatar
Linus Torvalds committed
3186

3187
	status = -ECHILD;
3188
	if (!may_block)
3189 3190
		goto out;

3191 3192 3193
	/*
	 * Determine which access bits we want to ask for...
	 */
3194 3195
	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
		     nfs_access_xattr_mask(NFS_SERVER(inode));
3196 3197 3198 3199
	if (S_ISDIR(inode->i_mode))
		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
	else
		cache.mask |= NFS_ACCESS_EXECUTE;
3200
	status = NFS_PROTO(inode)->access(inode, &cache, cred);
3201 3202 3203
	if (status != 0) {
		if (status == -ESTALE) {
			if (!S_ISDIR(inode->i_mode))
3204 3205 3206
				nfs_set_inode_stale(inode);
			else
				nfs_zap_caches(inode);
3207
		}
3208
		goto out;
3209
	}
3210
	nfs_access_add_cache(inode, &cache, cred);
3211
out_cached:
3212
	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
3213
	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
3214
		status = -EACCES;
Linus Torvalds's avatar
Linus Torvalds committed
3215
out:
3216
	trace_nfs_access_exit(inode, mask, cache_mask, status);
3217
	return status;
Linus Torvalds's avatar
Linus Torvalds committed
3218 3219
}

3220 3221 3222 3223
static int nfs_open_permission_mask(int openflags)
{
	int mask = 0;

3224 3225 3226 3227 3228 3229 3230 3231 3232 3233
	if (openflags & __FMODE_EXEC) {
		/* ONLY check exec rights */
		mask = MAY_EXEC;
	} else {
		if ((openflags & O_ACCMODE) != O_WRONLY)
			mask |= MAY_READ;
		if ((openflags & O_ACCMODE) != O_RDONLY)
			mask |= MAY_WRITE;
	}

3234 3235 3236
	return mask;
}

3237
int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
3238 3239 3240
{
	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
3241
EXPORT_SYMBOL_GPL(nfs_may_open);
3242

3243 3244 3245
static int nfs_execute_ok(struct inode *inode, int mask)
{
	struct nfs_server *server = NFS_SERVER(inode);
3246
	int ret = 0;
3247

3248 3249
	if (S_ISDIR(inode->i_mode))
		return 0;
3250
	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
3251 3252 3253 3254
		if (mask & MAY_NOT_BLOCK)
			return -ECHILD;
		ret = __nfs_revalidate_inode(server, inode);
	}
3255 3256 3257 3258 3259
	if (ret == 0 && !execute_ok(inode))
		ret = -EACCES;
	return ret;
}

3260 3261 3262
int nfs_permission(struct user_namespace *mnt_userns,
		   struct inode *inode,
		   int mask)
Linus Torvalds's avatar
Linus Torvalds committed
3263
{
3264
	const struct cred *cred = current_cred();
Linus Torvalds's avatar
Linus Torvalds committed
3265 3266
	int res = 0;

3267 3268
	nfs_inc_stats(inode, NFSIOS_VFSACCESS);

3269
	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
Linus Torvalds's avatar
Linus Torvalds committed
3270 3271
		goto out;
	/* Is this sys_access() ? */
Eric Paris's avatar
Eric Paris committed
3272
	if (mask & (MAY_ACCESS | MAY_CHDIR))
Linus Torvalds's avatar
Linus Torvalds committed
3273 3274 3275 3276 3277 3278
		goto force_lookup;

	switch (inode->i_mode & S_IFMT) {
		case S_IFLNK:
			goto out;
		case S_IFREG:
3279 3280 3281
			if ((mask & MAY_OPEN) &&
			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
				return 0;
Linus Torvalds's avatar
Linus Torvalds committed
3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295
			break;
		case S_IFDIR:
			/*
			 * Optimize away all write operations, since the server
			 * will check permissions when we perform the op.
			 */
			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
				goto out;
	}

force_lookup:
	if (!NFS_PROTO(inode)->access)
		goto out_notsup;

3296
	res = nfs_do_access(inode, cred, mask);
Linus Torvalds's avatar
Linus Torvalds committed
3297
out:
3298 3299
	if (!res && (mask & MAY_EXEC))
		res = nfs_execute_ok(inode, mask);
3300

3301
	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
Chuck Lever's avatar
Chuck Lever committed
3302
		inode->i_sb->s_id, inode->i_ino, mask, res);
Linus Torvalds's avatar
Linus Torvalds committed
3303 3304
	return res;
out_notsup:
3305 3306 3307
	if (mask & MAY_NOT_BLOCK)
		return -ECHILD;

3308 3309
	res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
						  NFS_INO_INVALID_OTHER);
Linus Torvalds's avatar
Linus Torvalds committed
3310
	if (res == 0)
3311
		res = generic_permission(&init_user_ns, inode, mask);
Chuck Lever's avatar
Chuck Lever committed
3312
	goto out;
Linus Torvalds's avatar
Linus Torvalds committed
3313
}
3314
EXPORT_SYMBOL_GPL(nfs_permission);