Commit 616d8602 authored by Linus Torvalds's avatar Linus Torvalds

Linux 2.2.2-pre2

this one contains various small documentation updates and updates to xconfig,
but the important parts (and the smallest part of the actual patch) are:

 - shared file lockup fix by Stephen Tweedie
 - my fix for the TCP bug that Ingo found
 - Ingo's io-apic setup fixes, which should finally get rid of the
   spurious apic interrupts with some motherboards and the ExtINT setup.
 - inode leak thing
 - SMP scheduler potential race condition fix
 - sound driver updates
 - partition and disk fixes (2kB blocksize media and some IDE disk
   geometry and irq detection issues).

None of the fixes are critical to most people, but all of them _can_ be
critical to people who have seen vulnerabilities in the area. As such, if
you're happy with 2.2.1 there is no pressing reason to test this patch
out, but I hope to have the pre-patches so that the final 2.2.2 can be
left around for a while (CD-ROM manufacturers etc would certainly prefer
to not see lots of releases).

                Linus
parent da0f0135
......@@ -202,7 +202,7 @@ DO_ACTION( enable, 1, |= 0xff000000, ) /* destination = 0xff */
DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync()) /* mask = 1 */
DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
static void __init clear_IO_APIC_pin(unsigned int pin)
static void clear_IO_APIC_pin(unsigned int pin)
{
struct IO_APIC_route_entry entry;
......@@ -215,6 +215,13 @@ static void __init clear_IO_APIC_pin(unsigned int pin)
io_apic_write(0x11 + 2 * pin, *(((int *)&entry) + 1));
}
static void clear_IO_APIC (void)
{
int pin;
for (pin = 0; pin < nr_ioapic_registers; pin++)
clear_IO_APIC_pin(pin);
}
/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
......@@ -625,7 +632,7 @@ void __init setup_IO_APIC_irqs(void)
/*
* Set up a certain pin as ExtINT delivered interrupt
*/
void __init setup_ExtINT_pin(unsigned int pin)
void __init setup_ExtINT_pin(unsigned int pin, int irq)
{
struct IO_APIC_route_entry entry;
......@@ -635,17 +642,16 @@ void __init setup_ExtINT_pin(unsigned int pin)
memset(&entry,0,sizeof(entry));
entry.delivery_mode = dest_ExtINT;
entry.dest_mode = 1; /* logical delivery */
entry.dest_mode = 0; /* physical delivery */
entry.mask = 0; /* unmask IRQ now */
/*
* Careful with this one. We do not use 'true' logical
* delivery, as we set local APICs to LDR == 0. But
* 0xff logical destination is special (broadcast).
* Any other combination will cause problems.
* We use physical delivery to get the timer IRQ
* to the boot CPU. 'boot_cpu_id' is the physical
* APIC ID of the boot CPU.
*/
entry.dest.logical.logical_dest = 0xff;
entry.dest.physical.physical_dest = boot_cpu_id;
entry.vector = 0; /* it's ignored */
entry.vector = assign_irq_vector(irq);
entry.polarity = 0;
entry.trigger = 0;
......@@ -760,7 +766,7 @@ void __init print_IO_APIC(void)
static void __init init_sym_mode(void)
{
int i, pin;
int i;
for (i = 0; i < PIN_MAP_SIZE; i++) {
irq_2_pin[i].pin = -1;
......@@ -790,8 +796,7 @@ static void __init init_sym_mode(void)
/*
* Do not trust the IO-APIC being empty at bootup
*/
for (pin = 0; pin < nr_ioapic_registers; pin++)
clear_IO_APIC_pin(pin);
clear_IO_APIC();
}
/*
......@@ -799,6 +804,15 @@ static void __init init_sym_mode(void)
*/
void init_pic_mode(void)
{
/*
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
/*
* Put it back into PIC mode (has an effect only on
* certain boards)
*/
printk("disabling symmetric IO mode... ");
outb_p(0x70, 0x22);
outb_p(0x00, 0x23);
......@@ -1184,7 +1198,7 @@ static inline void check_timer(void)
if (pin2 != -1) {
printk(".. (found pin %d) ...", pin2);
setup_ExtINT_pin(pin2);
setup_ExtINT_pin(pin2, 0);
make_8259A_irq(0);
}
......
......@@ -165,7 +165,7 @@ do_kdsk_ioctl(int cmd, struct kbentry *user_kbe, int perm, struct kbd_struct *kb
val = K_HOLE;
} else
val = (i ? K_HOLE : K_NOSUCHMAP);
return __put_user(val, &user_kbe->kb_value);
return put_user(val, &user_kbe->kb_value);
case KDSKBENT:
if (!perm)
return -EPERM;
......@@ -244,7 +244,7 @@ do_kbkeycode_ioctl(int cmd, struct kbkeycode *user_kbkc, int perm)
case KDGETKEYCODE:
kc = getkeycode(tmp.scancode);
if (kc >= 0)
kc = __put_user(kc, &user_kbkc->keycode);
kc = put_user(kc, &user_kbkc->keycode);
break;
case KDSETKEYCODE:
if (!perm)
......@@ -282,8 +282,8 @@ do_kdgkb_ioctl(int cmd, struct kbsentry *user_kdgkb, int perm)
p = func_table[i];
if(p)
for ( ; *p && sz; p++, sz--)
__put_user(*p, q++);
__put_user('\0', q);
put_user(*p, q++);
put_user('\0', q);
return ((p && *p) ? -EOVERFLOW : 0);
case KDSKBSENT:
if (!perm)
......@@ -603,12 +603,10 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
{
struct kbdiacrs *a = (struct kbdiacrs *)arg;
i = verify_area(VERIFY_WRITE, (void *) a, sizeof(struct kbdiacrs));
if (i)
return i;
__put_user(accent_table_size, &a->kb_cnt);
__copy_to_user(a->kbdiacr, accent_table,
accent_table_size*sizeof(struct kbdiacr));
if (put_user(accent_table_size, &a->kb_cnt))
return -EFAULT;
if (copy_to_user(a->kbdiacr, accent_table, accent_table_size*sizeof(struct kbdiacr)))
return -EFAULT;
return 0;
}
......@@ -619,14 +617,13 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
if (!perm)
return -EPERM;
i = verify_area(VERIFY_READ, (void *) a, sizeof(struct kbdiacrs));
if (i)
return i;
__get_user(ct,&a->kb_cnt);
if (get_user(ct,&a->kb_cnt))
return -EFAULT;
if (ct >= MAX_DIACR)
return -EINVAL;
accent_table_size = ct;
__copy_from_user(accent_table, a->kbdiacr, ct*sizeof(struct kbdiacr));
if (copy_from_user(accent_table, a->kbdiacr, ct*sizeof(struct kbdiacr)))
return -EFAULT;
return 0;
}
......@@ -717,12 +714,12 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
i = verify_area(VERIFY_WRITE,(void *)vtstat, sizeof(struct vt_stat));
if (i)
return i;
__put_user(fg_console + 1, &vtstat->v_active);
put_user(fg_console + 1, &vtstat->v_active);
state = 1; /* /dev/tty0 is always open */
for (i = 0, mask = 2; i < MAX_NR_CONSOLES && mask; ++i, mask <<= 1)
if (VT_IS_IN_USE(i))
state |= mask;
return __put_user(state, &vtstat->v_state);
return put_user(state, &vtstat->v_state);
}
/*
......@@ -856,8 +853,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
i = verify_area(VERIFY_READ, (void *)vtsizes, sizeof(struct vt_sizes));
if (i)
return i;
__get_user(ll, &vtsizes->v_rows);
__get_user(cc, &vtsizes->v_cols);
get_user(ll, &vtsizes->v_rows);
get_user(cc, &vtsizes->v_cols);
return vc_resize_all(ll, cc);
}
......@@ -870,12 +867,12 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
i = verify_area(VERIFY_READ, (void *)vtconsize, sizeof(struct vt_consize));
if (i)
return i;
__get_user(ll, &vtconsize->v_rows);
__get_user(cc, &vtconsize->v_cols);
__get_user(vlin, &vtconsize->v_vlin);
__get_user(clin, &vtconsize->v_clin);
__get_user(vcol, &vtconsize->v_vcol);
__get_user(ccol, &vtconsize->v_ccol);
get_user(ll, &vtconsize->v_rows);
get_user(cc, &vtconsize->v_cols);
get_user(vlin, &vtconsize->v_vlin);
get_user(clin, &vtconsize->v_clin);
get_user(vcol, &vtconsize->v_vcol);
get_user(ccol, &vtconsize->v_ccol);
vlin = vlin ? vlin : video_scan_lines;
if ( clin )
{
......
......@@ -232,13 +232,15 @@ void clear_inode(struct inode *inode)
/*
* Dispose-list gets a local list, so it doesn't need to
* worry about list corruption.
* worry about list corruption. It releases the inode lock
* while clearing the inodes.
*/
static void dispose_list(struct list_head * head)
{
struct list_head *next;
int count = 0;
spin_unlock(&inode_lock);
next = head->next;
for (;;) {
struct list_head * tmp = next;
......@@ -256,7 +258,6 @@ static void dispose_list(struct list_head * head)
spin_lock(&inode_lock);
list_splice(head, &inode_unused);
inodes_stat.nr_free_inodes += count;
spin_unlock(&inode_lock);
}
/*
......@@ -305,52 +306,52 @@ int invalidate_inodes(struct super_block * sb)
spin_lock(&inode_lock);
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
spin_unlock(&inode_lock);
return busy;
}
/*
* This is called with the inode lock held. It searches
* the in-use for the specified number of freeable inodes.
* Freeable inodes are moved to a temporary list and then
* placed on the unused list by dispose_list.
* the in-use for freeable inodes, which are moved to a
* temporary list and then placed on the unused list by
* dispose_list.
*
* We don't expect to have to call this very often.
*
* Note that we do not expect to have to search very hard:
* the freeable inodes will be at the old end of the list.
*
* N.B. The spinlock is released to call dispose_list.
* N.B. The spinlock is released during the call to
* dispose_list.
*/
#define CAN_UNUSE(inode) \
(((inode)->i_count == 0) && \
(!(inode)->i_state))
(((inode)->i_count | (inode)->i_state) == 0)
#define INODE(entry) (list_entry(entry, struct inode, i_list))
static int free_inodes(int goal)
static int free_inodes(void)
{
struct list_head *tmp, *head = &inode_in_use;
LIST_HEAD(freeable);
int found = 0, depth = goal << 1;
struct list_head list, *entry, *freeable = &list;
int found = 0;
while ((tmp = head->prev) != head && depth--) {
struct inode * inode = list_entry(tmp, struct inode, i_list);
INIT_LIST_HEAD(freeable);
entry = inode_in_use.next;
while (entry != &inode_in_use) {
struct list_head *tmp = entry;
entry = entry->next;
if (!CAN_UNUSE(INODE(tmp)))
continue;
list_del(tmp);
if (CAN_UNUSE(inode)) {
list_del(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_hash);
list_add(tmp, &freeable);
if (++found < goal)
continue;
break;
}
list_add(tmp, head);
list_del(&INODE(tmp)->i_hash);
INIT_LIST_HEAD(&INODE(tmp)->i_hash);
list_add(tmp, freeable);
found = 1;
}
if (found) {
spin_unlock(&inode_lock);
dispose_list(&freeable);
spin_lock(&inode_lock);
dispose_list(freeable);
found = 1; /* silly compiler */
}
return found;
}
......@@ -374,7 +375,7 @@ static void shrink_dentry_inodes(int goal)
static void try_to_free_inodes(int goal)
{
shrink_dentry_inodes(goal);
if (!free_inodes(goal))
if (!free_inodes())
shrink_dentry_inodes(goal);
}
......@@ -385,7 +386,7 @@ static void try_to_free_inodes(int goal)
void free_inode_memory(int goal)
{
spin_lock(&inode_lock);
free_inodes(goal);
free_inodes();
spin_unlock(&inode_lock);
}
......@@ -450,7 +451,7 @@ static struct inode * grow_inodes(void)
inodes_stat.preshrink = 1;
spin_lock(&inode_lock);
free_inodes(inodes_stat.nr_inodes >> 2);
free_inodes();
{
struct list_head *tmp = inode_unused.next;
if (tmp != &inode_unused) {
......
......@@ -174,6 +174,8 @@ struct mm_struct {
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
unsigned long cpu_vm_mask;
unsigned long swap_cnt; /* number of pages to swap on next pass */
unsigned long swap_address;
/*
* This is an architecture-specific pointer: the portable
* part of Linux does not know about any segments.
......@@ -191,7 +193,7 @@ struct mm_struct {
0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, \
0, 0, NULL }
0, 0, 0, 0, NULL }
struct signal_struct {
atomic_t count;
......@@ -276,8 +278,6 @@ struct task_struct {
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
int swappable:1;
unsigned long swap_address;
unsigned long swap_cnt; /* number of pages to swap on next pass */
/* process credentials */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
......@@ -361,7 +361,7 @@ struct task_struct {
/* utime */ {0,0,0,0},0, \
/* per CPU times */ {0, }, {0, }, \
/* flt */ 0,0,0,0,0,0, \
/* swp */ 0,0,0, \
/* swp */ 0, \
/* process credentials */ \
/* uid etc */ 0,0,0,0,0,0,0,0, \
/* suppl grps*/ 0, {0,}, \
......
......@@ -64,6 +64,7 @@ extern int console_loglevel;
static int init(void *);
extern int bdflush(void *);
extern int kswapd(void *);
extern int kpiod(void *);
extern void kswapd_setup(void);
extern void init_IRQ(void);
......@@ -1271,6 +1272,7 @@ static void __init do_basic_setup(void)
kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
/* Start the background pageout daemon. */
kswapd_setup();
kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
#if CONFIG_AP1000
......
......@@ -107,6 +107,7 @@ EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(update_vm_cache);
EXPORT_SYMBOL(vmtruncate);
EXPORT_SYMBOL(find_vma);
EXPORT_SYMBOL(get_unmapped_area);
/* filesystem internal functions */
EXPORT_SYMBOL(in_group_p);
......
......@@ -680,8 +680,18 @@ asmlinkage void schedule(void)
sched_data->prevstate = prev->state;
/* this is the scheduler proper: */
{
struct task_struct * p = init_task.next_run;
int c = -1000;
/* Default process to select.. */
next = idle_task;
if (prev->state == TASK_RUNNING) {
c = goodness(prev, prev, this_cpu);
next = prev;
}
/*
* This is subtle.
* Note how we can enable interrupts here, even
......@@ -693,36 +703,27 @@ asmlinkage void schedule(void)
* the scheduler lock
*/
spin_unlock_irq(&runqueue_lock);
#ifdef __SMP__
prev->has_cpu = 0;
#endif
/*
* Note! there may appear new tasks on the run-queue during this, as
* interrupts are enabled. However, they will be put on front of the
* list, so our list starting at "p" is essentially fixed.
*/
/* this is the scheduler proper: */
{
int c = -1000;
next = idle_task;
while (p != &init_task) {
if (can_schedule(p)) {
int weight = goodness(p, prev, this_cpu);
if (weight > c)
c = weight, next = p;
}
p = p->next_run;
while (p != &init_task) {
if (can_schedule(p)) {
int weight = goodness(p, prev, this_cpu);
if (weight > c)
c = weight, next = p;
}
p = p->next_run;
}
/* Do we need to re-calculate counters? */
if (!c) {
struct task_struct *p;
read_lock(&tasklist_lock);
for_each_task(p)
p->counter = (p->counter >> 1) + p->priority;
read_unlock(&tasklist_lock);
}
/* Do we need to re-calculate counters? */
if (!c) {
struct task_struct *p;
read_lock(&tasklist_lock);
for_each_task(p)
p->counter = (p->counter >> 1) + p->priority;
read_unlock(&tasklist_lock);
}
}
......@@ -751,10 +752,8 @@ asmlinkage void schedule(void)
* thus we have to lock the previous process from getting
* rescheduled during switch_to().
*/
prev->has_cpu = 1;
next->has_cpu = 1;
next->processor = this_cpu;
next->has_cpu = 1;
spin_unlock(&scheduler_lock);
#endif /* __SMP__ */
if (prev != next) {
......
......@@ -19,6 +19,7 @@
#include <linux/blkdev.h>
#include <linux/file.h>
#include <linux/swapctl.h>
#include <linux/slab.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
......@@ -39,6 +40,26 @@ struct page * page_hash_table[PAGE_HASH_SIZE];
#define release_page(page) __free_page((page))
/*
* Define a request structure for outstanding page write requests
* to the background page io daemon
*/
struct pio_request
{
struct pio_request * next;
struct file * file;
unsigned long offset;
unsigned long page;
};
static struct pio_request *pio_first = NULL, **pio_last = &pio_first;
static kmem_cache_t *pio_request_cache;
static struct wait_queue *pio_wait = NULL;
static inline void
make_pio_request(struct file *, unsigned long, unsigned long);
/*
* Invalidate the pages of an inode, removing all pages that aren't
* locked down (those are sure to be up-to-date anyway, so we shouldn't
......@@ -1079,8 +1100,9 @@ static inline int do_write_page(struct inode * inode, struct file * file,
}
static int filemap_write_page(struct vm_area_struct * vma,
unsigned long offset,
unsigned long page)
unsigned long offset,
unsigned long page,
int wait)
{
int result;
struct file * file;
......@@ -1098,6 +1120,17 @@ static int filemap_write_page(struct vm_area_struct * vma,
* and file could be released ... increment the count to be safe.
*/
file->f_count++;
/*
* If this is a swapping operation rather than msync(), then
* leave the actual IO, and the restoration of the file count,
* to the kpiod thread. Just queue the request for now.
*/
if (!wait) {
make_pio_request(file, offset, page);
return 0;
}
down(&inode->i_sem);
result = do_write_page(inode, file, (const char *) page, offset);
up(&inode->i_sem);
......@@ -1113,7 +1146,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
*/
int filemap_swapout(struct vm_area_struct * vma, struct page * page)
{
return filemap_write_page(vma, page->offset, page_address(page));
return filemap_write_page(vma, page->offset, page_address(page), 0);
}
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
......@@ -1150,7 +1183,7 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
return 0;
}
}
error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
free_page(page);
return error;
}
......@@ -1569,3 +1602,120 @@ void put_cached_page(unsigned long addr)
wake_up(&page->wait);
__free_page(page);
}
/* Add request for page IO to the queue */
static inline void put_pio_request(struct pio_request *p)
{
*pio_last = p;
p->next = NULL;
pio_last = &p->next;
}
/* Take the first page IO request off the queue */
static inline struct pio_request * get_pio_request(void)
{
struct pio_request * p = pio_first;
pio_first = p->next;
if (!pio_first)
pio_last = &pio_first;
return p;
}
/* Make a new page IO request and queue it to the kpiod thread */
static inline void make_pio_request(struct file *file,
unsigned long offset,
unsigned long page)
{
struct pio_request *p;
atomic_inc(&mem_map[MAP_NR(page)].count);
/*
* We need to allocate without causing any recursive IO in the
* current thread's context. We might currently be swapping out
* as a result of an allocation made while holding a critical
* filesystem lock. To avoid deadlock, we *MUST* not reenter
* the filesystem in this thread.
*
* We can wait for kswapd to free memory, or we can try to free
* pages without actually performing further IO, without fear of
* deadlock. --sct
*/
while ((p = kmem_cache_alloc(pio_request_cache, GFP_BUFFER)) == NULL) {
if (try_to_free_pages(__GFP_WAIT))
continue;
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ/10);
}
p->file = file;
p->offset = offset;
p->page = page;
put_pio_request(p);
wake_up(&pio_wait);
}
/*
* This is the only thread which is allowed to write out filemap pages
* while swapping.
*
* To avoid deadlock, it is important that we never reenter this thread.
* Although recursive memory allocations within this thread may result
* in more page swapping, that swapping will always be done by queuing
* another IO request to the same thread: we will never actually start
* that IO request until we have finished with the current one, and so
* we will not deadlock.
*/
int kpiod(void * unused)
{
struct wait_queue wait = {current};
struct inode * inode;
struct dentry * dentry;
struct pio_request * p;
current->session = 1;
current->pgrp = 1;
strcpy(current->comm, "kpiod");
sigfillset(&current->blocked);
init_waitqueue(&pio_wait);
lock_kernel();
pio_request_cache = kmem_cache_create("pio_request",
sizeof(struct pio_request),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!pio_request_cache)
panic ("Could not create pio_request slab cache");
while (1) {
current->state = TASK_INTERRUPTIBLE;
add_wait_queue(&pio_wait, &wait);
while (!pio_first)
schedule();
remove_wait_queue(&pio_wait, &wait);
current->state = TASK_RUNNING;
while (pio_first) {
p = get_pio_request();
dentry = p->file->f_dentry;
inode = dentry->d_inode;
down(&inode->i_sem);
do_write_page(inode, p->file,
(const char *) p->page, p->offset);
up(&inode->i_sem);
fput(p->file);
free_page(p->page);
kmem_cache_free(pio_request_cache, p);
}
}
}
......@@ -202,7 +202,7 @@ static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct *
do {
int result;
tsk->swap_address = address + PAGE_SIZE;
tsk->mm->swap_address = address + PAGE_SIZE;
result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
if (result)
return result;
......@@ -274,7 +274,7 @@ static int swap_out_process(struct task_struct * p, int gfp_mask)
/*
* Go through process' page directory.
*/
address = p->swap_address;
address = p->mm->swap_address;
/*
* Find the proper vm-area
......@@ -296,8 +296,8 @@ static int swap_out_process(struct task_struct * p, int gfp_mask)
}
/* We didn't find anything for the process */
p->swap_cnt = 0;
p->swap_address = 0;
p->mm->swap_cnt = 0;
p->mm->swap_address = 0;
return 0;
}
......@@ -345,9 +345,9 @@ static int swap_out(unsigned int priority, int gfp_mask)
continue;
/* Refresh swap_cnt? */
if (assign)
p->swap_cnt = p->mm->rss;
if (p->swap_cnt > max_cnt) {
max_cnt = p->swap_cnt;
p->mm->swap_cnt = p->mm->rss;
if (p->mm->swap_cnt > max_cnt) {
max_cnt = p->mm->swap_cnt;
pbest = p;
}
}
......
......@@ -184,6 +184,8 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
i++, req = req->dl_next) {
if (req->sk)
continue;
pos += 128;
if (pos < offset)
continue;
......
......@@ -1563,12 +1563,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
}
#endif /* CONFIG_FILTER */
/*
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
*/
lock_sock(sk);
/*
* This doesn't check if the socket has enough room for the packet.
* Either process the packet _without_ queueing it and then free it,
......@@ -1579,7 +1573,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->state == TCP_ESTABLISHED) { /* Fast path */
if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
goto reset;
release_sock(sk);
return 0;
}
......@@ -1590,14 +1583,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
nsk = tcp_v4_hnd_req(sk, skb);
if (!nsk)
goto discard;
lock_sock(nsk);
release_sock(sk);
/*
* Queue it on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
if (atomic_read(&nsk->sock_readers)) {
__skb_queue_tail(&nsk->back_log, skb);
return 0;
}
sk = nsk;
}
if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
goto reset;
release_sock(sk);
return 0;
reset:
......@@ -1609,7 +1609,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
* might be destroyed here. This current version compiles correctly,
* but you have been warned.
*/
release_sock(sk);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment