Commit a300bad2 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Make msync(MS_ASYNC) no longer start the I/O

MS_ASYNC will currently wait on previously-submitted I/O, then start new I/O
and not wait on it.  This can cause undesirable blocking if msync is called
rapidly against the same memory.

So instead, change msync(MS_ASYNC) to not start any IO at all.  Just flush
the pte dirty bits into the pageframe and leave it at that.

The IO _will_ happen within a kupdate period.  And the application can use
fsync() or fadvise(FADV_DONTNEED) if it actually wants to schedule the IO
immediately.

(This has triggered an ext3 bug - the page's buffers get dirtied so fast
that kjournald keeps writing the buffers over and over for 10-20 seconds
before deciding to give up for some reason)
parent 72c4f88a
...@@ -125,11 +125,13 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address, ...@@ -125,11 +125,13 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
/* /*
* MS_SYNC syncs the entire file - including mappings. * MS_SYNC syncs the entire file - including mappings.
* *
* MS_ASYNC initiates writeout of just the dirty mapped data. * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
* This provides no guarantee of file integrity - things like indirect * marks the relevant pages dirty. The application may now run fsync() to
* blocks may not have started writeout. MS_ASYNC is primarily useful * write out the dirty pages and wait on the writeout and check the result.
* where the application knows that it has finished with the data and * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
* wishes to intelligently schedule its own I/O traffic. * async writeout immediately.
* So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/ */
static int msync_interval(struct vm_area_struct * vma, static int msync_interval(struct vm_area_struct * vma,
unsigned long start, unsigned long end, int flags) unsigned long start, unsigned long end, int flags)
...@@ -143,22 +145,20 @@ static int msync_interval(struct vm_area_struct * vma, ...@@ -143,22 +145,20 @@ static int msync_interval(struct vm_area_struct * vma,
if (file && (vma->vm_flags & VM_SHARED)) { if (file && (vma->vm_flags & VM_SHARED)) {
ret = filemap_sync(vma, start, end-start, flags); ret = filemap_sync(vma, start, end-start, flags);
if (!ret && (flags & (MS_SYNC|MS_ASYNC))) { if (!ret && (flags & MS_SYNC)) {
struct inode * inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
int err; int err;
down(&inode->i_sem); down(&inode->i_sem);
ret = filemap_fdatawrite(inode->i_mapping); ret = filemap_fdatawrite(inode->i_mapping);
if (flags & MS_SYNC) {
if (file->f_op && file->f_op->fsync) { if (file->f_op && file->f_op->fsync) {
err = file->f_op->fsync(file, file->f_dentry, 1); err = file->f_op->fsync(file,file->f_dentry,1);
if (err && !ret) if (err && !ret)
ret = err; ret = err;
} }
err = filemap_fdatawait(inode->i_mapping); err = filemap_fdatawait(inode->i_mapping);
if (!ret) if (!ret)
ret = err; ret = err;
}
up(&inode->i_sem); up(&inode->i_sem);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment