Commit 351ae16b authored by Linus Torvalds's avatar Linus Torvalds

Import 2.2.2pre5

parent 724170c9
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -241,7 +241,7 @@ CONFIG_EEXPRESS_PRO100=y ...@@ -241,7 +241,7 @@ CONFIG_EEXPRESS_PRO100=y
# CONFIG_ISDN is not set # CONFIG_ISDN is not set
# #
# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) # Old CD-ROM drivers (not SCSI, not IDE)
# #
# CONFIG_CD_NO_IDESCSI is not set # CONFIG_CD_NO_IDESCSI is not set
......
...@@ -43,6 +43,7 @@ EXPORT_SYMBOL(kernel_thread); ...@@ -43,6 +43,7 @@ EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL_NOVERS(__down_failed); EXPORT_SYMBOL_NOVERS(__down_failed);
EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
EXPORT_SYMBOL_NOVERS(__up_wakeup); EXPORT_SYMBOL_NOVERS(__up_wakeup);
/* Networking helper routines. */ /* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy); EXPORT_SYMBOL(csum_partial_copy);
......
...@@ -569,6 +569,9 @@ static int __init assign_irq_vector(int irq) ...@@ -569,6 +569,9 @@ static int __init assign_irq_vector(int irq)
printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n", printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n",
current_vector); current_vector);
} }
if (current_vector == SYSCALL_VECTOR)
panic("ran out of interrupt sources!");
IO_APIC_VECTOR(irq) = current_vector; IO_APIC_VECTOR(irq) = current_vector;
return current_vector; return current_vector;
} }
...@@ -693,9 +696,11 @@ void __init print_IO_APIC(void) ...@@ -693,9 +696,11 @@ void __init print_IO_APIC(void)
printk(".... register #01: %08X\n", *(int *)&reg_01); printk(".... register #01: %08X\n", *(int *)&reg_01);
printk("....... : max redirection entries: %04X\n", reg_01.entries); printk("....... : max redirection entries: %04X\n", reg_01.entries);
if ( (reg_01.entries != 0x0f) && /* ISA-only Neptune boards */ if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
(reg_01.entries != 0x17) && /* ISA+PCI boards */ (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
(reg_01.entries != 0x3F) /* Xeon boards */ (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
(reg_01.entries != 0x1f) && /* dual Xeon boards */
(reg_01.entries != 0x3F) /* bigger Xeon boards */
) )
UNEXPECTED_IO_APIC(); UNEXPECTED_IO_APIC();
if (reg_01.entries == 0x0f) if (reg_01.entries == 0x0f)
...@@ -1163,7 +1168,7 @@ static inline void init_IO_APIC_traps(void) ...@@ -1163,7 +1168,7 @@ static inline void init_IO_APIC_traps(void)
* 0x80, because int 0x80 is hm, kind of importantish. ;) * 0x80, because int 0x80 is hm, kind of importantish. ;)
*/ */
for (i = 0; i < NR_IRQS ; i++) { for (i = 0; i < NR_IRQS ; i++) {
if (IO_APIC_IRQ(i)) { if (IO_APIC_VECTOR(i) > 0) {
if (IO_APIC_irq_trigger(i)) if (IO_APIC_irq_trigger(i))
irq_desc[i].handler = &ioapic_level_irq_type; irq_desc[i].handler = &ioapic_level_irq_type;
else else
...@@ -1173,8 +1178,15 @@ static inline void init_IO_APIC_traps(void) ...@@ -1173,8 +1178,15 @@ static inline void init_IO_APIC_traps(void)
*/ */
if (i < 16) if (i < 16)
disable_8259A_irq(i); disable_8259A_irq(i);
} } else
/*
* we have no business changing low ISA
* IRQs.
*/
if (IO_APIC_IRQ(i))
irq_desc[i].handler = &no_irq_type;
} }
init_IRQ_SMP();
} }
/* /*
...@@ -1278,14 +1290,12 @@ void __init setup_IO_APIC(void) ...@@ -1278,14 +1290,12 @@ void __init setup_IO_APIC(void)
construct_default_ISA_mptable(); construct_default_ISA_mptable();
} }
init_IO_APIC_traps();
/* /*
* Set up the IO-APIC IRQ routing table by parsing the MP-BIOS * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
* mptable: * mptable:
*/ */
setup_IO_APIC_irqs(); setup_IO_APIC_irqs();
init_IRQ_SMP(); init_IO_APIC_traps();
check_timer(); check_timer();
print_IO_APIC(); print_IO_APIC();
......
...@@ -70,11 +70,34 @@ atomic_t nmi_counter; ...@@ -70,11 +70,34 @@ atomic_t nmi_counter;
*/ */
spinlock_t irq_controller_lock; spinlock_t irq_controller_lock;
/* /*
* Dummy controller type for unused interrupts * Dummy controller type for unused interrupts
*/ */
static void do_none(unsigned int irq, struct pt_regs * regs) { } static void do_none(unsigned int irq, struct pt_regs * regs)
{
/*
* we are careful. While for ISA irqs it's common to happen
* outside of any driver (think autodetection), this is not
* at all nice for PCI interrupts. So we are stricter and
* print a warning when such spurious interrupts happen.
* Spurious interrupts can confuse other drivers if the PCI
* IRQ line is shared.
*
* Such spurious interrupts are either driver bugs, or
* sometimes hw (chipset) bugs.
*/
printk("unexpected IRQ vector %d on CPU#%d!\n",irq, smp_processor_id());
#ifdef __SMP__
/*
* [currently unexpected vectors happen only on SMP and APIC.
* if we want to have non-APIC and non-8259A controllers
* in the future with unexpected vectors, this ack should
* probably be made controller-specific.]
*/
ack_APIC_irq();
#endif
}
static void enable_none(unsigned int irq) { } static void enable_none(unsigned int irq) { }
static void disable_none(unsigned int irq) { } static void disable_none(unsigned int irq) { }
...@@ -82,7 +105,7 @@ static void disable_none(unsigned int irq) { } ...@@ -82,7 +105,7 @@ static void disable_none(unsigned int irq) { }
#define startup_none enable_none #define startup_none enable_none
#define shutdown_none disable_none #define shutdown_none disable_none
static struct hw_interrupt_type no_irq_type = { struct hw_interrupt_type no_irq_type = {
"none", "none",
startup_none, startup_none,
shutdown_none, shutdown_none,
...@@ -141,10 +164,10 @@ static unsigned int cached_irq_mask = 0xffff; ...@@ -141,10 +164,10 @@ static unsigned int cached_irq_mask = 0xffff;
* fed to the CPU IRQ line directly. * fed to the CPU IRQ line directly.
* *
* Any '1' bit in this mask means the IRQ is routed through the IO-APIC. * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
* this 'mixed mode' IRQ handling costs us one more branch in do_IRQ, * this 'mixed mode' IRQ handling costs nothing because it's only used
* but we have _much_ higher compatibility and robustness this way. * at IRQ setup time.
*/ */
unsigned long long io_apic_irqs = 0; unsigned long io_apic_irqs = 0;
/* /*
* These have to be protected by the irq controller spinlock * These have to be protected by the irq controller spinlock
...@@ -254,32 +277,43 @@ static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs) ...@@ -254,32 +277,43 @@ static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)
BUILD_COMMON_IRQ() BUILD_COMMON_IRQ()
#define BI(x,y) \
BUILD_IRQ(##x##y)
#define BUILD_16_IRQS(x) \
BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
/* /*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x20-0x30)
*/ */
BUILD_IRQ(0) BUILD_IRQ(1) BUILD_IRQ(2) BUILD_IRQ(3) BUILD_16_IRQS(0x0)
BUILD_IRQ(4) BUILD_IRQ(5) BUILD_IRQ(6) BUILD_IRQ(7)
BUILD_IRQ(8) BUILD_IRQ(9) BUILD_IRQ(10) BUILD_IRQ(11)
BUILD_IRQ(12) BUILD_IRQ(13) BUILD_IRQ(14) BUILD_IRQ(15)
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
/* /*
* The IO-APIC gives us many more interrupt sources.. * The IO-APIC gives us many more interrupt sources. Most of these
* are unused but an SMP system is supposed to have enough memory ...
* sometimes (mostly wrt. hw bugs) we get corrupted vectors all
* across the spectrum, so we really want to be prepared to get all
* of these. Plus, more powerful systems might have more than 64
* IO-APIC registers.
*
* (these are usually mapped into the 0x30-0xff vector range)
*/ */
BUILD_IRQ(16) BUILD_IRQ(17) BUILD_IRQ(18) BUILD_IRQ(19) BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
BUILD_IRQ(20) BUILD_IRQ(21) BUILD_IRQ(22) BUILD_IRQ(23) BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
BUILD_IRQ(24) BUILD_IRQ(25) BUILD_IRQ(26) BUILD_IRQ(27) BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
BUILD_IRQ(28) BUILD_IRQ(29) BUILD_IRQ(30) BUILD_IRQ(31) BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
BUILD_IRQ(32) BUILD_IRQ(33) BUILD_IRQ(34) BUILD_IRQ(35)
BUILD_IRQ(36) BUILD_IRQ(37) BUILD_IRQ(38) BUILD_IRQ(39)
BUILD_IRQ(40) BUILD_IRQ(41) BUILD_IRQ(42) BUILD_IRQ(43)
BUILD_IRQ(44) BUILD_IRQ(45) BUILD_IRQ(46) BUILD_IRQ(47)
BUILD_IRQ(48) BUILD_IRQ(49) BUILD_IRQ(50) BUILD_IRQ(51)
BUILD_IRQ(52) BUILD_IRQ(53) BUILD_IRQ(54) BUILD_IRQ(55)
BUILD_IRQ(56) BUILD_IRQ(57) BUILD_IRQ(58) BUILD_IRQ(59)
BUILD_IRQ(60) BUILD_IRQ(61) BUILD_IRQ(62) BUILD_IRQ(63)
#endif #endif
#undef BUILD_16_IRQS
#undef BI
#ifdef __SMP__ #ifdef __SMP__
/* /*
* The following vectors are part of the Linux architecture, there * The following vectors are part of the Linux architecture, there
...@@ -303,37 +337,35 @@ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt) ...@@ -303,37 +337,35 @@ BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)
#endif #endif
#define IRQ(x,y) \
IRQ##x##y##_interrupt
#define IRQLIST_16(x) \
IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
static void (*interrupt[NR_IRQS])(void) = { static void (*interrupt[NR_IRQS])(void) = {
IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt, IRQLIST_16(0x0),
IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
,IRQ16_interrupt, IRQ17_interrupt, IRQ18_interrupt, IRQ19_interrupt, IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQ20_interrupt, IRQ21_interrupt, IRQ22_interrupt, IRQ23_interrupt, IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQ24_interrupt, IRQ25_interrupt, IRQ26_interrupt, IRQ27_interrupt, IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
IRQ28_interrupt, IRQ29_interrupt, IRQLIST_16(0xc), IRQLIST_16(0xd)
IRQ30_interrupt, IRQ31_interrupt, IRQ32_interrupt, IRQ33_interrupt,
IRQ34_interrupt, IRQ35_interrupt, IRQ36_interrupt, IRQ37_interrupt,
IRQ38_interrupt, IRQ39_interrupt,
IRQ40_interrupt, IRQ41_interrupt, IRQ42_interrupt, IRQ43_interrupt,
IRQ44_interrupt, IRQ45_interrupt, IRQ46_interrupt, IRQ47_interrupt,
IRQ48_interrupt, IRQ49_interrupt,
IRQ50_interrupt, IRQ51_interrupt, IRQ52_interrupt, IRQ53_interrupt,
IRQ54_interrupt, IRQ55_interrupt, IRQ56_interrupt, IRQ57_interrupt,
IRQ58_interrupt, IRQ59_interrupt,
IRQ60_interrupt, IRQ61_interrupt, IRQ62_interrupt, IRQ63_interrupt
#endif #endif
}; };
#undef IRQ
#undef IRQLIST_16
/* /*
* Initial irq handlers. * Special irq handlers.
*/ */
void no_action(int cpl, void *dev_id, struct pt_regs *regs) void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
{
}
#ifndef CONFIG_VISWS #ifndef CONFIG_VISWS
/* /*
...@@ -770,7 +802,7 @@ asmlinkage void do_IRQ(struct pt_regs regs) ...@@ -770,7 +802,7 @@ asmlinkage void do_IRQ(struct pt_regs regs)
* 0 return value means that this irq is already being * 0 return value means that this irq is already being
* handled by some other CPU. (or is disabled) * handled by some other CPU. (or is disabled)
*/ */
unsigned int irq = regs.orig_eax & 0xff; int irq = regs.orig_eax & 0xff; /* subtle, see irq.h */
int cpu = smp_processor_id(); int cpu = smp_processor_id();
kstat.irqs[cpu][irq]++; kstat.irqs[cpu][irq]++;
...@@ -986,42 +1018,6 @@ int probe_irq_off(unsigned long unused) ...@@ -986,42 +1018,6 @@ int probe_irq_off(unsigned long unused)
return irq_found; return irq_found;
} }
/*
* Silly, horrible hack
*/
static char uglybuffer[10*256];
__asm__("\n" __ALIGN_STR"\n"
"common_unexpected:\n\t"
SAVE_ALL
"pushl $ret_from_intr\n\t"
"jmp strange_interrupt");
void strange_interrupt(int irqnum)
{
printk("Unexpected interrupt %d\n", irqnum & 255);
for (;;);
}
extern int common_unexpected;
__initfunc(void init_unexpected_irq(void))
{
int i;
for (i = 0; i < 256; i++) {
char *code = uglybuffer + 10*i;
unsigned long jumpto = (unsigned long) &common_unexpected;
jumpto -= (unsigned long)(code+10);
code[0] = 0x68; /* pushl */
*(int *)(code+1) = i - 512;
code[5] = 0xe9; /* jmp */
*(int *)(code+6) = jumpto;
set_intr_gate(i,code);
}
}
void init_ISA_irqs (void) void init_ISA_irqs (void)
{ {
int i; int i;
...@@ -1033,7 +1029,7 @@ void init_ISA_irqs (void) ...@@ -1033,7 +1029,7 @@ void init_ISA_irqs (void)
if (i < 16) { if (i < 16) {
/* /*
* 16 old-style INTA-cycle interrupt gates: * 16 old-style INTA-cycle interrupts:
*/ */
irq_desc[i].handler = &i8259A_irq_type; irq_desc[i].handler = &i8259A_irq_type;
} else { } else {
...@@ -1054,9 +1050,16 @@ __initfunc(void init_IRQ(void)) ...@@ -1054,9 +1050,16 @@ __initfunc(void init_IRQ(void))
#else #else
init_VISWS_APIC_irqs(); init_VISWS_APIC_irqs();
#endif #endif
/*
for (i = 0; i < 16; i++) * Cover the whole vector space, no vector can escape
set_intr_gate(0x20+i,interrupt[i]); * us. (some of these will be overridden and become
* 'special' SMP interrupts)
*/
for (i = 0; i < NR_IRQS; i++) {
int vector = FIRST_EXTERNAL_VECTOR + i;
if (vector != SYSCALL_VECTOR)
set_intr_gate(vector, interrupt[i]);
}
#ifdef __SMP__ #ifdef __SMP__
...@@ -1067,13 +1070,9 @@ __initfunc(void init_IRQ(void)) ...@@ -1067,13 +1070,9 @@ __initfunc(void init_IRQ(void))
set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]); set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]);
/* /*
* The reschedule interrupt slowly changes it's functionality, * The reschedule interrupt is a CPU-to-CPU reschedule-helper
* while so far it was a kind of broadcasted timer interrupt, * IPI, driven by wakeup.
* in the future it should become a CPU-to-CPU rescheduling IPI,
* driven by schedule() ?
*/ */
/* IPI for rescheduling */
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */ /* IPI for invalidation */
......
...@@ -16,6 +16,7 @@ struct hw_interrupt_type { ...@@ -16,6 +16,7 @@ struct hw_interrupt_type {
void (*disable)(unsigned int irq); void (*disable)(unsigned int irq);
}; };
extern struct hw_interrupt_type no_irq_type;
/* /*
* IRQ line status. * IRQ line status.
...@@ -40,6 +41,18 @@ typedef struct { ...@@ -40,6 +41,18 @@ typedef struct {
unsigned int depth; /* Disable depth for nested irq disables */ unsigned int depth; /* Disable depth for nested irq disables */
} irq_desc_t; } irq_desc_t;
/*
* IDT vectors usable for external interrupt sources start
* at 0x20:
*/
#define FIRST_EXTERNAL_VECTOR 0x20
#define SYSCALL_VECTOR 0x80
/*
* Vectors 0x20-0x2f are used for ISA interrupts.
*/
/* /*
* Special IRQ vectors used by the SMP architecture: * Special IRQ vectors used by the SMP architecture:
* *
...@@ -54,7 +67,7 @@ typedef struct { ...@@ -54,7 +67,7 @@ typedef struct {
#define MTRR_CHANGE_VECTOR 0x50 #define MTRR_CHANGE_VECTOR 0x50
/* /*
* First vector available to drivers: (vectors 0x51-0xfe) * First APIC vector available to drivers: (vectors 0x51-0xfe)
*/ */
#define IRQ0_TRAP_VECTOR 0x51 #define IRQ0_TRAP_VECTOR 0x51
...@@ -94,7 +107,9 @@ extern void send_IPI(int dest, int vector); ...@@ -94,7 +107,9 @@ extern void send_IPI(int dest, int vector);
extern void init_pic_mode(void); extern void init_pic_mode(void);
extern void print_IO_APIC(void); extern void print_IO_APIC(void);
extern unsigned long long io_apic_irqs; extern unsigned long io_apic_irqs;
extern char _stext, _etext;
#define MAX_IRQ_SOURCES 128 #define MAX_IRQ_SOURCES 128
#define MAX_MP_BUSSES 32 #define MAX_MP_BUSSES 32
...@@ -126,7 +141,7 @@ static inline void irq_exit(int cpu, unsigned int irq) ...@@ -126,7 +141,7 @@ static inline void irq_exit(int cpu, unsigned int irq)
hardirq_exit(cpu); hardirq_exit(cpu);
} }
#define IO_APIC_IRQ(x) ((1<<x) & io_apic_irqs) #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
#else #else
...@@ -201,6 +216,13 @@ __asm__( \ ...@@ -201,6 +216,13 @@ __asm__( \
"pushl $ret_from_intr\n\t" \ "pushl $ret_from_intr\n\t" \
"jmp "SYMBOL_NAME_STR(do_IRQ)); "jmp "SYMBOL_NAME_STR(do_IRQ));
/*
* subtle. orig_eax is used by the signal code to distinct between
* system calls and interrupted 'random user-space'. Thus we have
* to put a negative value into orig_eax here. (the problem is that
* both system calls and IRQs want to have small integer numbers in
* orig_eax, and the syscall code has won the optimization conflict ;)
*/
#define BUILD_IRQ(nr) \ #define BUILD_IRQ(nr) \
asmlinkage void IRQ_NAME(nr); \ asmlinkage void IRQ_NAME(nr); \
__asm__( \ __asm__( \
...@@ -216,7 +238,6 @@ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ ...@@ -216,7 +238,6 @@ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
static inline void x86_do_profile (unsigned long eip) static inline void x86_do_profile (unsigned long eip)
{ {
if (prof_buffer && current->pid) { if (prof_buffer && current->pid) {
extern int _stext;
eip -= (unsigned long) &_stext; eip -= (unsigned long) &_stext;
eip >>= prof_shift; eip >>= prof_shift;
/* /*
......
...@@ -807,7 +807,7 @@ int get_cpuinfo(char * buffer) ...@@ -807,7 +807,7 @@ int get_cpuinfo(char * buffer)
c->x86_model, c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown"); c->x86_model_id[0] ? c->x86_model_id : "unknown");
if (c->x86_mask) if (c->x86_mask || c->cpuid_level >= 0)
p += sprintf(p, "stepping\t: %d\n", c->x86_mask); p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
else else
p += sprintf(p, "stepping\t: unknown\n"); p += sprintf(p, "stepping\t: unknown\n");
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#include "irq.h" #include "irq.h"
extern unsigned long start_kernel, _etext; extern unsigned long start_kernel;
extern void update_one_process( struct task_struct *p, extern void update_one_process( struct task_struct *p,
unsigned long ticks, unsigned long user, unsigned long ticks, unsigned long user,
unsigned long system, int cpu); unsigned long system, int cpu);
...@@ -319,8 +319,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) ...@@ -319,8 +319,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS); printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
else else
{ {
int ver = m->mpc_apicver;
cpu_present_map|=(1<<m->mpc_apicid); cpu_present_map|=(1<<m->mpc_apicid);
apic_version[m->mpc_apicid]=m->mpc_apicver; /*
* Validate version
*/
if (ver == 0x0) {
printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
} }
} }
mpt+=sizeof(*m); mpt+=sizeof(*m);
...@@ -1806,8 +1815,10 @@ asmlinkage void smp_mtrr_interrupt(void) ...@@ -1806,8 +1815,10 @@ asmlinkage void smp_mtrr_interrupt(void)
*/ */
asmlinkage void smp_spurious_interrupt(void) asmlinkage void smp_spurious_interrupt(void)
{ {
/* ack_APIC_irq(); see sw-dev-man vol 3, chapter 7.4.13.5 */ ack_APIC_irq();
printk("spurious APIC interrupt, ayiee, should never happen.\n"); /* see sw-dev-man vol 3, chapter 7.4.13.5 */
printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
smp_processor_id());
} }
/* /*
...@@ -2058,3 +2069,4 @@ int setup_profiling_timer(unsigned int multiplier) ...@@ -2058,3 +2069,4 @@ int setup_profiling_timer(unsigned int multiplier)
} }
#undef APIC_DIVISOR #undef APIC_DIVISOR
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
#include <asm/lithium.h> #include <asm/lithium.h>
#endif #endif
#include "irq.h"
asmlinkage int system_call(void); asmlinkage int system_call(void);
asmlinkage void lcall7(void); asmlinkage void lcall7(void);
...@@ -125,7 +127,6 @@ static void show_registers(struct pt_regs *regs) ...@@ -125,7 +127,6 @@ static void show_registers(struct pt_regs *regs)
unsigned long esp; unsigned long esp;
unsigned short ss; unsigned short ss;
unsigned long *stack, addr, module_start, module_end; unsigned long *stack, addr, module_start, module_end;
extern char _stext, _etext;
esp = (unsigned long) (1+regs); esp = (unsigned long) (1+regs);
ss = __KERNEL_DS; ss = __KERNEL_DS;
...@@ -669,9 +670,6 @@ cobalt_init(void) ...@@ -669,9 +670,6 @@ cobalt_init(void)
#endif #endif
void __init trap_init(void) void __init trap_init(void)
{ {
/* Initially up all of the IDT to jump to unexpected */
init_unexpected_irq();
if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
EISA_bus = 1; EISA_bus = 1;
set_call_gate(&default_ldt,lcall7); set_call_gate(&default_ldt,lcall7);
...@@ -693,7 +691,7 @@ void __init trap_init(void) ...@@ -693,7 +691,7 @@ void __init trap_init(void)
set_trap_gate(15,&spurious_interrupt_bug); set_trap_gate(15,&spurious_interrupt_bug);
set_trap_gate(16,&coprocessor_error); set_trap_gate(16,&coprocessor_error);
set_trap_gate(17,&alignment_check); set_trap_gate(17,&alignment_check);
set_system_gate(0x80,&system_call); set_system_gate(SYSCALL_VECTOR,&system_call);
/* set up GDT task & ldt entries */ /* set up GDT task & ldt entries */
set_tss_desc(0, &init_task.tss); set_tss_desc(0, &init_task.tss);
......
...@@ -31,6 +31,15 @@ ENTRY(__down_failed_interruptible) ...@@ -31,6 +31,15 @@ ENTRY(__down_failed_interruptible)
popl %edx /* restore %edx */ popl %edx /* restore %edx */
ret ret
/* Don't save/restore %eax, because that will be our return value */
ENTRY(__down_failed_trylock)
pushl %edx /* save %edx */
pushl %ecx /* save %ecx (and argument) */
call SYMBOL_NAME(__down_trylock)
popl %ecx /* restore %ecx (count on __down_trylock not changing it) */
popl %edx /* restore %edx */
ret
ENTRY(__up_wakeup) ENTRY(__up_wakeup)
pushl %eax /* save %eax */ pushl %eax /* save %eax */
pushl %edx /* save %edx */ pushl %edx /* save %edx */
......
...@@ -120,6 +120,7 @@ static struct bttv bttvs[BTTV_MAX]; ...@@ -120,6 +120,7 @@ static struct bttv bttvs[BTTV_MAX];
#define I2C_TIMING (0x7<<4) #define I2C_TIMING (0x7<<4)
#define I2C_DELAY 10 #define I2C_DELAY 10
#define I2C_SET(CTRL,DATA) \ #define I2C_SET(CTRL,DATA) \
{ btwrite((CTRL<<1)|(DATA), BT848_I2C); udelay(I2C_DELAY); } { btwrite((CTRL<<1)|(DATA), BT848_I2C); udelay(I2C_DELAY); }
#define I2C_GET() (btread(BT848_I2C)&1) #define I2C_GET() (btread(BT848_I2C)&1)
...@@ -244,6 +245,7 @@ static void i2c_setlines(struct i2c_bus *bus,int ctrl,int data) ...@@ -244,6 +245,7 @@ static void i2c_setlines(struct i2c_bus *bus,int ctrl,int data)
{ {
struct bttv *btv = (struct bttv*)bus->data; struct bttv *btv = (struct bttv*)bus->data;
btwrite((ctrl<<1)|data, BT848_I2C); btwrite((ctrl<<1)|data, BT848_I2C);
btread(BT848_I2C); /* flush buffers */
udelay(I2C_DELAY); udelay(I2C_DELAY);
} }
......
...@@ -774,7 +774,6 @@ static int msp3410d_thread(void *data) ...@@ -774,7 +774,6 @@ static int msp3410d_thread(void *data)
goto done; goto done;
dprintk("msp3410: thread: sleep\n"); dprintk("msp3410: thread: sleep\n");
down_interruptible(&sem); down_interruptible(&sem);
sem.owner = 0;
dprintk("msp3410: thread: wakeup\n"); dprintk("msp3410: thread: wakeup\n");
if (msp->rmmod) if (msp->rmmod)
goto done; goto done;
......
...@@ -731,9 +731,6 @@ int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *)) ...@@ -731,9 +731,6 @@ int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
printk (KERN_ERR "ide-scsi: drive id %d not present\n", cmd->target); printk (KERN_ERR "ide-scsi: drive id %d not present\n", cmd->target);
goto abort; goto abort;
} }
if (cmd->lun != 0) { /* Only respond to LUN 0. Drop others */
goto abort;
}
scsi = drive->driver_data; scsi = drive->driver_data;
pc = kmalloc (sizeof (idescsi_pc_t), GFP_ATOMIC); pc = kmalloc (sizeof (idescsi_pc_t), GFP_ATOMIC);
rq = kmalloc (sizeof (struct request), GFP_ATOMIC); rq = kmalloc (sizeof (struct request), GFP_ATOMIC);
......
...@@ -1972,7 +1972,6 @@ scsi_error_handler(void * data) ...@@ -1972,7 +1972,6 @@ scsi_error_handler(void * data)
*/ */
SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler sleeping\n")); SCSI_LOG_ERROR_RECOVERY(1,printk("Error handler sleeping\n"));
down_interruptible (&sem); down_interruptible (&sem);
sem.owner = 0;
if (signal_pending(current) ) if (signal_pending(current) )
break; break;
......
...@@ -385,7 +385,9 @@ asmlinkage int sys_fdatasync(unsigned int fd) ...@@ -385,7 +385,9 @@ asmlinkage int sys_fdatasync(unsigned int fd)
goto out_putf; goto out_putf;
/* this needs further work, at the moment it is identical to fsync() */ /* this needs further work, at the moment it is identical to fsync() */
down(&inode->i_sem);
err = file->f_op->fsync(file, dentry); err = file->f_op->fsync(file, dentry);
up(&inode->i_sem);
out_putf: out_putf:
fput(file); fput(file);
...@@ -812,8 +814,8 @@ void refile_buffer(struct buffer_head * buf) ...@@ -812,8 +814,8 @@ void refile_buffer(struct buffer_head * buf)
* If too high a percentage of the buffers are dirty... * If too high a percentage of the buffers are dirty...
*/ */
if (nr_buffers_type[BUF_DIRTY] > too_many || if (nr_buffers_type[BUF_DIRTY] > too_many ||
(size_buffers_type[BUF_DIRTY] + size_buffers_type[BUF_LOCKED])/PAGE_SIZE > too_large) { size_buffers_type[BUF_DIRTY]/PAGE_SIZE > too_large) {
if (nr_buffers_type[BUF_LOCKED] > 2 * bdf_prm.b_un.ndirty) if (nr_buffers_type[BUF_LOCKED] > 3 * bdf_prm.b_un.ndirty)
wakeup_bdflush(1); wakeup_bdflush(1);
else else
wakeup_bdflush(0); wakeup_bdflush(0);
...@@ -1767,7 +1769,7 @@ int bdflush(void * unused) ...@@ -1767,7 +1769,7 @@ int bdflush(void * unused)
#ifdef DEBUG #ifdef DEBUG
for(nlist = 0; nlist < NR_LIST; nlist++) for(nlist = 0; nlist < NR_LIST; nlist++)
#else #else
for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++) for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)
#endif #endif
{ {
ndirty = 0; ndirty = 0;
...@@ -1786,11 +1788,16 @@ int bdflush(void * unused) ...@@ -1786,11 +1788,16 @@ int bdflush(void * unused)
} }
/* Clean buffer on dirty list? Refile it */ /* Clean buffer on dirty list? Refile it */
if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh)) if (nlist == BUF_DIRTY && !buffer_dirty(bh)) {
{ refile_buffer(bh);
refile_buffer(bh); continue;
continue; }
}
/* Unlocked buffer on locked list? Refile it */
if (nlist == BUF_LOCKED && !buffer_locked(bh)) {
refile_buffer(bh);
continue;
}
if (buffer_locked(bh) || !buffer_dirty(bh)) if (buffer_locked(bh) || !buffer_dirty(bh))
continue; continue;
......
1999-01-30 a sun <asun@hecate.darksunrising.blah>
* catalog.c (hfs_cat_move): fixed corruption problem with
renames.
1999-01-27 a sun <asun@hecate.darksunrising.blah>
* file_hdr.c (get/set_dates): got rid of broken afpd times. NOTE:
you must use netatalk-1.4b2+asun2.1.2 or newer for this.
1998-12-20 a sun <asun@hecate.darksunrising.blah> 1998-12-20 a sun <asun@hecate.darksunrising.blah>
* bdelete.c (del_root): assign bthLNode and bthFNode only if the * bdelete.c (del_root): assign bthLNode and bthFNode only if the
......
...@@ -1348,7 +1348,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir, ...@@ -1348,7 +1348,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
hfs_sleep_on(&mdb->rename_wait); hfs_sleep_on(&mdb->rename_wait);
} }
spin_lock(&entry_lock); spin_lock(&entry_lock);
mdb->rename_lock = 1; mdb->rename_lock = 1; /* XXX: should be atomic_inc */
spin_unlock(&entry_lock); spin_unlock(&entry_lock);
/* keep readers from getting confused by changing dir size */ /* keep readers from getting confused by changing dir size */
...@@ -1385,7 +1385,6 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir, ...@@ -1385,7 +1385,6 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
restart: restart:
/* see if the destination exists, getting it if it does */ /* see if the destination exists, getting it if it does */
dest = hfs_cat_get(mdb, new_key); dest = hfs_cat_get(mdb, new_key);
if (!dest) { if (!dest) {
/* destination doesn't exist, so create it */ /* destination doesn't exist, so create it */
struct hfs_cat_rec new_record; struct hfs_cat_rec new_record;
...@@ -1408,14 +1407,16 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir, ...@@ -1408,14 +1407,16 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
goto bail3; goto bail3;
} }
/* build the new record */ /* build the new record. make sure to zero out the
record. */
memset(&new_record, 0, sizeof(new_record));
new_record.cdrType = entry->type; new_record.cdrType = entry->type;
__write_entry(entry, &new_record); __write_entry(entry, &new_record);
/* insert the new record */ /* insert the new record */
error = hfs_binsert(mdb->cat_tree, HFS_BKEY(new_key), error = hfs_binsert(mdb->cat_tree, HFS_BKEY(new_key),
&new_record, is_dir ? 2 + sizeof(DIR_REC) : &new_record, is_dir ? 2 + sizeof(DIR_REC) :
2 + sizeof(FIL_REC)); 2 + sizeof(FIL_REC));
if (error == -EEXIST) { if (error == -EEXIST) {
delete_entry(dest); delete_entry(dest);
unlock_entry(dest); unlock_entry(dest);
...@@ -1565,7 +1566,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir, ...@@ -1565,7 +1566,7 @@ int hfs_cat_move(struct hfs_cat_entry *old_dir, struct hfs_cat_entry *new_dir,
} }
end_write(new_dir); end_write(new_dir);
spin_lock(&entry_lock); spin_lock(&entry_lock);
mdb->rename_lock = 0; mdb->rename_lock = 0; /* XXX: should use atomic_dec */
hfs_wake_up(&mdb->rename_wait); hfs_wake_up(&mdb->rename_wait);
spin_unlock(&entry_lock); spin_unlock(&entry_lock);
......
...@@ -303,16 +303,9 @@ static inline void adjust_forks(struct hfs_cat_entry *entry, ...@@ -303,16 +303,9 @@ static inline void adjust_forks(struct hfs_cat_entry *entry,
static void get_dates(const struct hfs_cat_entry *entry, static void get_dates(const struct hfs_cat_entry *entry,
const struct inode *inode, hfs_u32 dates[3]) const struct inode *inode, hfs_u32 dates[3])
{ {
if (HFS_SB(inode->i_sb)->s_afpd) { dates[0] = hfs_m_to_htime(entry->create_date);
/* AFPD compatible: use un*x times */ dates[1] = hfs_m_to_htime(entry->modify_date);
dates[0] = htonl(hfs_m_to_utime(entry->create_date)); dates[2] = hfs_m_to_htime(entry->backup_date);
dates[1] = htonl(hfs_m_to_utime(entry->modify_date));
dates[2] = htonl(hfs_m_to_utime(entry->backup_date));
} else {
dates[0] = hfs_m_to_htime(entry->create_date);
dates[1] = hfs_m_to_htime(entry->modify_date);
dates[2] = hfs_m_to_htime(entry->backup_date);
}
} }
/* /*
...@@ -322,43 +315,23 @@ static void set_dates(struct hfs_cat_entry *entry, struct inode *inode, ...@@ -322,43 +315,23 @@ static void set_dates(struct hfs_cat_entry *entry, struct inode *inode,
const hfs_u32 *dates) const hfs_u32 *dates)
{ {
hfs_u32 tmp; hfs_u32 tmp;
if (HFS_SB(inode->i_sb)->s_afpd) {
/* AFPD compatible: use un*x times */ tmp = hfs_h_to_mtime(dates[0]);
tmp = hfs_u_to_mtime(ntohl(dates[0])); if (entry->create_date != tmp) {
if (entry->create_date != tmp) { entry->create_date = tmp;
entry->create_date = tmp; hfs_cat_mark_dirty(entry);
hfs_cat_mark_dirty(entry); }
} tmp = hfs_h_to_mtime(dates[1]);
tmp = hfs_u_to_mtime(ntohl(dates[1])); if (entry->modify_date != tmp) {
if (entry->modify_date != tmp) { entry->modify_date = tmp;
entry->modify_date = tmp; inode->i_ctime = inode->i_atime = inode->i_mtime =
inode->i_ctime = inode->i_atime = inode->i_mtime = hfs_h_to_utime(dates[1]);
ntohl(dates[1]); hfs_cat_mark_dirty(entry);
hfs_cat_mark_dirty(entry); }
} tmp = hfs_h_to_mtime(dates[2]);
tmp = hfs_u_to_mtime(ntohl(dates[2])); if (entry->backup_date != tmp) {
if (entry->backup_date != tmp) { entry->backup_date = tmp;
entry->backup_date = tmp; hfs_cat_mark_dirty(entry);
hfs_cat_mark_dirty(entry);
}
} else {
tmp = hfs_h_to_mtime(dates[0]);
if (entry->create_date != tmp) {
entry->create_date = tmp;
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[1]);
if (entry->modify_date != tmp) {
entry->modify_date = tmp;
inode->i_ctime = inode->i_atime = inode->i_mtime =
hfs_h_to_utime(dates[1]);
hfs_cat_mark_dirty(entry);
}
tmp = hfs_h_to_mtime(dates[2]);
if (entry->backup_date != tmp) {
entry->backup_date = tmp;
hfs_cat_mark_dirty(entry);
}
} }
} }
......
...@@ -478,7 +478,7 @@ nlmclnt_unlock_callback(struct rpc_task *task) ...@@ -478,7 +478,7 @@ nlmclnt_unlock_callback(struct rpc_task *task)
int status = req->a_res.status; int status = req->a_res.status;
if (RPC_ASSASSINATED(task)) if (RPC_ASSASSINATED(task))
return; goto die;
if (task->tk_status < 0) { if (task->tk_status < 0) {
dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
...@@ -490,6 +490,9 @@ nlmclnt_unlock_callback(struct rpc_task *task) ...@@ -490,6 +490,9 @@ nlmclnt_unlock_callback(struct rpc_task *task)
&& status != NLM_LCK_DENIED_GRACE_PERIOD) { && status != NLM_LCK_DENIED_GRACE_PERIOD) {
printk("lockd: unexpected unlock status: %d\n", status); printk("lockd: unexpected unlock status: %d\n", status);
} }
die:
rpc_release_task(task);
} }
/* /*
...@@ -565,6 +568,7 @@ nlmclnt_cancel_callback(struct rpc_task *task) ...@@ -565,6 +568,7 @@ nlmclnt_cancel_callback(struct rpc_task *task)
} }
die: die:
rpc_release_task(task);
nlm_release_host(req->a_host); nlm_release_host(req->a_host);
kfree(req); kfree(req);
return; return;
......
...@@ -561,6 +561,7 @@ nlmsvc_grant_callback(struct rpc_task *task) ...@@ -561,6 +561,7 @@ nlmsvc_grant_callback(struct rpc_task *task)
block->b_incall = 0; block->b_incall = 0;
nlm_release_host(call->a_host); nlm_release_host(call->a_host);
rpc_release_task(task);
} }
/* /*
......
...@@ -492,6 +492,7 @@ nlmsvc_callback_exit(struct rpc_task *task) ...@@ -492,6 +492,7 @@ nlmsvc_callback_exit(struct rpc_task *task)
task->tk_pid, -task->tk_status); task->tk_pid, -task->tk_status);
} }
nlm_release_host(call->a_host); nlm_release_host(call->a_host);
rpc_release_task(task);
kfree(call); kfree(call);
} }
......
...@@ -734,7 +734,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, ...@@ -734,7 +734,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
* directories via NFS. * directories via NFS.
*/ */
err = 0; err = 0;
if ((iap->ia_valid &= (ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
err = nfsd_setattr(rqstp, resfhp, iap); err = nfsd_setattr(rqstp, resfhp, iap);
out: out:
return err; return err;
......
...@@ -144,7 +144,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, wsize); ...@@ -144,7 +144,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, wsize);
result = smb_proc_write(dentry, offset, wsize, buffer); result = smb_proc_write(dentry, offset, wsize, buffer);
if (result < 0) if (result < 0)
goto io_error; break;
/* N.B. what if result < wsize?? */ /* N.B. what if result < wsize?? */
#ifdef SMBFS_PARANOIA #ifdef SMBFS_PARANOIA
if (result < wsize) if (result < wsize)
...@@ -162,15 +162,7 @@ printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result); ...@@ -162,15 +162,7 @@ printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result);
inode->i_size = offset; inode->i_size = offset;
inode->u.smbfs_i.cache_valid |= SMB_F_LOCALWRITE; inode->u.smbfs_i.cache_valid |= SMB_F_LOCALWRITE;
} while (count); } while (count);
out:
smb_unlock_page(page);
return written ? written : result; return written ? written : result;
io_error:
/* Must mark the page invalid after I/O error */
clear_bit(PG_uptodate, &page->flags);
goto out;
} }
/* /*
...@@ -190,6 +182,7 @@ smb_writepage(struct file *file, struct page *page) ...@@ -190,6 +182,7 @@ smb_writepage(struct file *file, struct page *page)
set_bit(PG_locked, &page->flags); set_bit(PG_locked, &page->flags);
atomic_inc(&page->count); atomic_inc(&page->count);
result = smb_writepage_sync(dentry, page, 0, PAGE_SIZE); result = smb_writepage_sync(dentry, page, 0, PAGE_SIZE);
smb_unlock_page(page);
free_page(page_address(page)); free_page(page_address(page));
return result; return result;
} }
......
...@@ -13,11 +13,15 @@ ...@@ -13,11 +13,15 @@
#define TIMER_IRQ 0 #define TIMER_IRQ 0
/* /*
* 16 XT IRQ's, 8 potential APIC interrupt sources. * 16 8259A IRQ's, 240 potential APIC interrupt sources.
* Right now the APIC is only used for SMP, but this * Right now the APIC is mostly only used for SMP.
* may change. * 256 vectors is an architectural limit. (we can have
* more than 256 devices theoretically, but they will
* have to use shared interrupts)
* Since vectors 0x00-0x1f are used/reserved for the CPU,
* the usable vector space is 0x20-0xff (224 vectors)
*/ */
#define NR_IRQS 64 #define NR_IRQS 224
static __inline__ int irq_cannonicalize(int irq) static __inline__ int irq_cannonicalize(int irq)
{ {
......
#ifndef _I386_SEMAPHORE_HELPER_H
#define _I386_SEMAPHORE_HELPER_H
/*
* SMP- and interrupt-safe semaphores helper functions.
*
* (C) Copyright 1996 Linus Torvalds
* (C) Copyright 1999 Andrea Arcangeli
*/
/*
* These two _must_ execute atomically wrt each other.
*
* This is trivially done with load_locked/store_cond,
* but on the x86 we need an external synchronizer.
*/
static inline void wake_one_more(struct semaphore * sem)
{
unsigned long flags;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (atomic_read(&sem->count) <= 0)
sem->waking++;
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
}
static inline int waking_non_zero(struct semaphore *sem)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0) {
sem->waking--;
ret = 1;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/*
* waking_non_zero_interruptible:
* 1 got the lock
* 0 go to sleep
* -EINTR interrupted
*
* We must undo the sem->count down_interruptible() increment while we are
* protected by the spinlock in order to make atomic this atomic_inc() with the
* atomic_read() in wake_one_more(), otherwise we can race. -arca
*/
static inline int waking_non_zero_interruptible(struct semaphore *sem,
struct task_struct *tsk)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0) {
sem->waking--;
ret = 1;
} else if (signal_pending(tsk)) {
atomic_inc(&sem->count);
ret = -EINTR;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/*
* waking_non_zero_trylock:
* 1 failed to lock
* 0 got the lock
*
* We must undo the sem->count down_trylock() increment while we are
* protected by the spinlock in order to make atomic this atomic_inc() with the
* atomic_read() in wake_one_more(), otherwise we can race. -arca
*/
static inline int waking_non_zero_trylock(struct semaphore *sem)
{
unsigned long flags;
int ret = 1;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking <= 0)
atomic_inc(&sem->count);
else {
sem->waking--;
ret = 0;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
#endif
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
* the original code and to make semaphore waits * the original code and to make semaphore waits
* interruptible so that processes waiting on * interruptible so that processes waiting on
* semaphores can be killed. * semaphores can be killed.
* Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
* functions in asm/sempahore-helper.h while fixing a
* potential and subtle race discovered by Ulrich Schmid
* in down_interruptible(). Since I started to play here I
* also implemented the `trylock' semaphore operation.
* *
* If you would like to see an analysis of this implementation, please * If you would like to see an analysis of this implementation, please
* ftp to gcom.com and download the file * ftp to gcom.com and download the file
...@@ -23,131 +28,29 @@ ...@@ -23,131 +28,29 @@
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/spinlock.h> #include <asm/spinlock.h>
/*
* Semaphores are recursive: we allow the holder process
* to recursively do down() operations on a semaphore that
* the process already owns. In order to do that, we need
* to keep a semaphore-local copy of the owner and the
* "depth of ownership".
*
* NOTE! Nasty memory ordering rules:
* - "owner" and "owner_count" may only be modified once you hold the
* lock.
* - "owner_count" must be written _after_ modifying owner, and
* must be read _before_ reading owner. There must be appropriate
* write and read barriers to enforce this.
*
* On an x86, writes are always ordered, so the only enformcement
* necessary is to make sure that the owner_depth is written after
* the owner value in program order.
*
* For read ordering guarantees, the semaphore wake_lock spinlock
* is already giving us ordering guarantees.
*
* Other (saner) architectures would use "wmb()" and "rmb()" to
* do this in a more obvious manner.
*/
struct semaphore { struct semaphore {
atomic_t count; atomic_t count;
unsigned long owner, owner_depth;
int waking; int waking;
struct wait_queue * wait; struct wait_queue * wait;
}; };
/* #define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, NULL })
* Because we want the non-contention case to be #define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, NULL })
* fast, we save the stack pointer into the "owner"
* field, and to get the true task pointer we have
* to do the bit masking. That moves the masking
* operation into the slow path.
*/
#define semaphore_owner(sem) \
((struct task_struct *)((2*PAGE_MASK) & (sem)->owner))
#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, 0, 0, NULL })
#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, 1, 0, NULL })
asmlinkage void __down_failed(void /* special register calling convention */); asmlinkage void __down_failed(void /* special register calling convention */);
asmlinkage int __down_failed_interruptible(void /* params in registers */); asmlinkage int __down_failed_interruptible(void /* params in registers */);
asmlinkage int __down_failed_trylock(void /* params in registers */);
asmlinkage void __up_wakeup(void /* special register calling convention */); asmlinkage void __up_wakeup(void /* special register calling convention */);
asmlinkage void __down(struct semaphore * sem); asmlinkage void __down(struct semaphore * sem);
asmlinkage int __down_interruptible(struct semaphore * sem); asmlinkage int __down_interruptible(struct semaphore * sem);
asmlinkage int __down_trylock(struct semaphore * sem);
asmlinkage void __up(struct semaphore * sem); asmlinkage void __up(struct semaphore * sem);
extern spinlock_t semaphore_wake_lock; extern spinlock_t semaphore_wake_lock;
#define sema_init(sem, val) atomic_set(&((sem)->count), (val)) #define sema_init(sem, val) atomic_set(&((sem)->count), (val))
/*
* These two _must_ execute atomically wrt each other.
*
* This is trivially done with load_locked/store_cond,
* but on the x86 we need an external synchronizer.
*/
static inline void wake_one_more(struct semaphore * sem)
{
unsigned long flags;
spin_lock_irqsave(&semaphore_wake_lock, flags);
sem->waking++;
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
}
/*
* NOTE NOTE NOTE!
*
* We read owner-count _before_ getting the semaphore. This
* is important, because the semaphore also acts as a memory
* ordering point between reading owner_depth and reading
* the owner.
*
* Why is this necessary? The "owner_depth" essentially protects
* us from using stale owner information - in the case that this
* process was the previous owner but somebody else is racing to
* aquire the semaphore, the only way we can see ourselves as an
* owner is with "owner_depth" of zero (so that we know to avoid
* the stale value).
*
* In the non-race case (where we really _are_ the owner), there
* is not going to be any question about what owner_depth is.
*
* In the race case, the race winner will not even get here, because
* it will have successfully gotten the semaphore with the locked
* decrement operation.
*
* Basically, we have two values, and we cannot guarantee that either
* is really up-to-date until we have aquired the semaphore. But we
* _can_ depend on a ordering between the two values, so we can use
* one of them to determine whether we can trust the other:
*
* Cases:
* - owner_depth == zero: ignore the semaphore owner, because it
* cannot possibly be us. Somebody else may be in the process
* of modifying it and the zero may be "stale", but it sure isn't
* going to say that "we" are the owner anyway, so who cares?
* - owner_depth is non-zero. That means that even if somebody
* else wrote the non-zero count value, the write ordering requriement
* means that they will have written themselves as the owner, so
* if we now see ourselves as an owner we can trust it to be true.
*/
static inline int waking_non_zero(struct semaphore *sem, struct task_struct *tsk)
{
unsigned long flags;
unsigned long owner_depth = sem->owner_depth;
int ret = 0;
spin_lock_irqsave(&semaphore_wake_lock, flags);
if (sem->waking > 0 || (owner_depth && semaphore_owner(sem) == tsk)) {
sem->owner = (unsigned long) tsk;
sem->owner_depth++; /* Don't use the possibly stale value */
sem->waking--;
ret = 1;
}
spin_unlock_irqrestore(&semaphore_wake_lock, flags);
return ret;
}
/* /*
* This is ugly, but we want the default case to fall through. * This is ugly, but we want the default case to fall through.
* "down_failed" is a special asm handler that calls the C * "down_failed" is a special asm handler that calls the C
...@@ -161,9 +64,7 @@ extern inline void down(struct semaphore * sem) ...@@ -161,9 +64,7 @@ extern inline void down(struct semaphore * sem)
"lock ; " "lock ; "
#endif #endif
"decl 0(%0)\n\t" "decl 0(%0)\n\t"
"js 2f\n\t" "js 2f\n"
"movl %%esp,4(%0)\n"
"movl $1,8(%0)\n\t"
"1:\n" "1:\n"
".section .text.lock,\"ax\"\n" ".section .text.lock,\"ax\"\n"
"2:\tpushl $1b\n\t" "2:\tpushl $1b\n\t"
...@@ -185,8 +86,6 @@ extern inline int down_interruptible(struct semaphore * sem) ...@@ -185,8 +86,6 @@ extern inline int down_interruptible(struct semaphore * sem)
#endif #endif
"decl 0(%1)\n\t" "decl 0(%1)\n\t"
"js 2f\n\t" "js 2f\n\t"
"movl %%esp,4(%1)\n\t"
"movl $1,8(%1)\n\t"
"xorl %0,%0\n" "xorl %0,%0\n"
"1:\n" "1:\n"
".section .text.lock,\"ax\"\n" ".section .text.lock,\"ax\"\n"
...@@ -199,6 +98,28 @@ extern inline int down_interruptible(struct semaphore * sem) ...@@ -199,6 +98,28 @@ extern inline int down_interruptible(struct semaphore * sem)
return result; return result;
} }
extern inline int down_trylock(struct semaphore * sem)
{
int result;
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
#ifdef __SMP__
"lock ; "
#endif
"decl 0(%1)\n\t"
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
".section .text.lock,\"ax\"\n"
"2:\tpushl $1b\n\t"
"jmp __down_failed_trylock\n"
".previous"
:"=a" (result)
:"c" (sem)
:"memory");
return result;
}
/* /*
* Note! This is subtle. We jump to wake people up only if * Note! This is subtle. We jump to wake people up only if
...@@ -210,7 +131,6 @@ extern inline void up(struct semaphore * sem) ...@@ -210,7 +131,6 @@ extern inline void up(struct semaphore * sem)
{ {
__asm__ __volatile__( __asm__ __volatile__(
"# atomic up operation\n\t" "# atomic up operation\n\t"
"decl 8(%0)\n\t"
#ifdef __SMP__ #ifdef __SMP__
"lock ; " "lock ; "
#endif #endif
......
...@@ -453,8 +453,7 @@ struct sock { ...@@ -453,8 +453,7 @@ struct sock {
#ifdef CONFIG_FILTER #ifdef CONFIG_FILTER
/* Socket Filtering Instructions */ /* Socket Filtering Instructions */
int filter; struct sk_filter *filter;
struct sock_filter *filter_data;
#endif /* CONFIG_FILTER */ #endif /* CONFIG_FILTER */
/* This is where all the private (optional) areas that don't /* This is where all the private (optional) areas that don't
...@@ -790,11 +789,11 @@ extern void sklist_destroy_socket(struct sock **list, struct sock *sk); ...@@ -790,11 +789,11 @@ extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
* sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. * than pkt_len we keep whole skb->data.
*/ */
extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) extern __inline__ int sk_filter(struct sk_buff *skb, struct sk_filter *filter)
{ {
int pkt_len; int pkt_len;
pkt_len = sk_run_filter(skb->data, skb->len, filter, flen); pkt_len = sk_run_filter(skb, filter->insns, filter->len);
if(!pkt_len) if(!pkt_len)
return 1; /* Toss Packet */ return 1; /* Toss Packet */
else else
...@@ -802,6 +801,23 @@ extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter, ...@@ -802,6 +801,23 @@ extern __inline__ int sk_filter(struct sk_buff *skb, struct sock_filter *filter,
return 0; return 0;
} }
extern __inline__ void sk_filter_release(struct sock *sk, struct sk_filter *fp)
{
unsigned int size = sk_filter_len(fp);
atomic_sub(size, &sk->omem_alloc);
if (atomic_dec_and_test(&fp->refcnt))
kfree_s(fp, size);
}
extern __inline__ void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
atomic_inc(&fp->refcnt);
atomic_add(sk_filter_len(fp), &sk->omem_alloc);
}
#endif /* CONFIG_FILTER */ #endif /* CONFIG_FILTER */
/* /*
...@@ -837,11 +853,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ...@@ -837,11 +853,8 @@ extern __inline__ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return -ENOMEM; return -ENOMEM;
#ifdef CONFIG_FILTER #ifdef CONFIG_FILTER
if (sk->filter) if (sk->filter && sk_filter(skb, sk->filter))
{ return -EPERM; /* Toss packet */
if (sk_filter(skb, sk->filter_data, sk->filter))
return -EPERM; /* Toss packet */
}
#endif /* CONFIG_FILTER */ #endif /* CONFIG_FILTER */
skb_set_owner_r(skb, sk); skb_set_owner_r(skb, sk);
......
...@@ -912,7 +912,7 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt * ...@@ -912,7 +912,7 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *
* can generate. * can generate.
*/ */
extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
int offer_wscale, int wscale, __u32 tstamp) int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
{ {
/* We always get an MSS option. /* We always get an MSS option.
* The option bytes which will be seen in normal data * The option bytes which will be seen in normal data
...@@ -936,7 +936,7 @@ extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sa ...@@ -936,7 +936,7 @@ extern __inline__ void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sa
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
*ptr++ = htonl(tstamp); /* TSVAL */ *ptr++ = htonl(tstamp); /* TSVAL */
*ptr++ = __constant_htonl(0); /* TSECR */ *ptr++ = htonl(ts_recent); /* TSECR */
} else if(sack) } else if(sack)
*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
......
...@@ -61,7 +61,7 @@ extern int request_dma(unsigned int dmanr, char * deviceID); ...@@ -61,7 +61,7 @@ extern int request_dma(unsigned int dmanr, char * deviceID);
extern void free_dma(unsigned int dmanr); extern void free_dma(unsigned int dmanr);
extern spinlock_t dma_spin_lock; extern spinlock_t dma_spin_lock;
#ifdef MODVERSIONS #ifdef CONFIG_MODVERSIONS
const struct module_symbol __export_Using_Versions const struct module_symbol __export_Using_Versions
__attribute__((section("__ksymtab"))) = { __attribute__((section("__ksymtab"))) = {
1 /* Version version */, "Using_Versions" 1 /* Version version */, "Using_Versions"
...@@ -322,6 +322,8 @@ EXPORT_SYMBOL(printk); ...@@ -322,6 +322,8 @@ EXPORT_SYMBOL(printk);
EXPORT_SYMBOL(sprintf); EXPORT_SYMBOL(sprintf);
EXPORT_SYMBOL(vsprintf); EXPORT_SYMBOL(vsprintf);
EXPORT_SYMBOL(kdevname); EXPORT_SYMBOL(kdevname);
EXPORT_SYMBOL(bdevname);
EXPORT_SYMBOL(cdevname);
EXPORT_SYMBOL(simple_strtoul); EXPORT_SYMBOL(simple_strtoul);
EXPORT_SYMBOL(system_utsname); /* UTS data */ EXPORT_SYMBOL(system_utsname); /* UTS data */
EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ EXPORT_SYMBOL(uts_sem); /* UTS semaphore */
...@@ -370,6 +372,7 @@ EXPORT_SYMBOL(is_bad_inode); ...@@ -370,6 +372,7 @@ EXPORT_SYMBOL(is_bad_inode);
EXPORT_SYMBOL(event); EXPORT_SYMBOL(event);
EXPORT_SYMBOL(__down); EXPORT_SYMBOL(__down);
EXPORT_SYMBOL(__down_interruptible); EXPORT_SYMBOL(__down_interruptible);
EXPORT_SYMBOL(__down_trylock);
EXPORT_SYMBOL(__up); EXPORT_SYMBOL(__up);
EXPORT_SYMBOL(brw_page); EXPORT_SYMBOL(brw_page);
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/semaphore-helper.h>
#include <linux/timex.h> #include <linux/timex.h>
...@@ -863,30 +864,28 @@ void __up(struct semaphore *sem) ...@@ -863,30 +864,28 @@ void __up(struct semaphore *sem)
struct task_struct *tsk = current; \ struct task_struct *tsk = current; \
struct wait_queue wait = { tsk, NULL }; struct wait_queue wait = { tsk, NULL };
#define DOWN_HEAD(task_state) \ #define DOWN_HEAD(task_state) \
\ \
\ \
tsk->state = (task_state); \ tsk->state = (task_state); \
add_wait_queue(&sem->wait, &wait); \ add_wait_queue(&sem->wait, &wait); \
\ \
/* \ /* \
* Ok, we're set up. sem->count is known to be less than zero \ * Ok, we're set up. sem->count is known to be less than zero \
* so we must wait. \ * so we must wait. \
* \ * \
* We can let go the lock for purposes of waiting. \ * We can let go the lock for purposes of waiting. \
* We re-acquire it after awaking so as to protect \ * We re-acquire it after awaking so as to protect \
* all semaphore operations. \ * all semaphore operations. \
* \ * \
* If "up()" is called before we call waking_non_zero() then \ * If "up()" is called before we call waking_non_zero() then \
* we will catch it right away. If it is called later then \ * we will catch it right away. If it is called later then \
* we will have to go through a wakeup cycle to catch it. \ * we will have to go through a wakeup cycle to catch it. \
* \ * \
* Multiple waiters contend for the semaphore lock to see \ * Multiple waiters contend for the semaphore lock to see \
* who gets to gate through and who has to wait some more. \ * who gets to gate through and who has to wait some more. \
*/ \ */ \
for (;;) { \ for (;;) {
if (waking_non_zero(sem, tsk)) /* are we waking up? */ \
break; /* yes, exit loop */
#define DOWN_TAIL(task_state) \ #define DOWN_TAIL(task_state) \
tsk->state = (task_state); \ tsk->state = (task_state); \
...@@ -898,6 +897,8 @@ void __down(struct semaphore * sem) ...@@ -898,6 +897,8 @@ void __down(struct semaphore * sem)
{ {
DOWN_VAR DOWN_VAR
DOWN_HEAD(TASK_UNINTERRUPTIBLE) DOWN_HEAD(TASK_UNINTERRUPTIBLE)
if (waking_non_zero(sem))
break;
schedule(); schedule();
DOWN_TAIL(TASK_UNINTERRUPTIBLE) DOWN_TAIL(TASK_UNINTERRUPTIBLE)
} }
...@@ -907,10 +908,13 @@ int __down_interruptible(struct semaphore * sem) ...@@ -907,10 +908,13 @@ int __down_interruptible(struct semaphore * sem)
DOWN_VAR DOWN_VAR
int ret = 0; int ret = 0;
DOWN_HEAD(TASK_INTERRUPTIBLE) DOWN_HEAD(TASK_INTERRUPTIBLE)
if (signal_pending(tsk))
ret = waking_non_zero_interruptible(sem, tsk);
if (ret)
{ {
ret = -EINTR; /* interrupted */ if (ret == 1)
atomic_inc(&sem->count); /* give up on down operation */ /* ret != 0 only if we get interrupted -arca */
ret = 0;
break; break;
} }
schedule(); schedule();
...@@ -918,6 +922,11 @@ int __down_interruptible(struct semaphore * sem) ...@@ -918,6 +922,11 @@ int __down_interruptible(struct semaphore * sem)
return ret; return ret;
} }
int __down_trylock(struct semaphore * sem)
{
return waking_non_zero_trylock(sem);
}
#define SLEEP_ON_VAR \ #define SLEEP_ON_VAR \
unsigned long flags; \ unsigned long flags; \
struct wait_queue wait; struct wait_queue wait;
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version * as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -36,6 +38,22 @@ ...@@ -36,6 +38,22 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/filter.h> #include <linux/filter.h>
/* No hurry in this branch */
static u8 *load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k>=SKF_NET_OFF)
ptr = skb->nh.raw + k - SKF_NET_OFF;
else if (k>=SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
if (ptr<skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
/* /*
* Decode and apply filter instructions to the skb->data. * Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are * Return length to keep, 0 for none. skb is the data we are
...@@ -43,15 +61,19 @@ ...@@ -43,15 +61,19 @@
* len is the number of filter blocks in the array. * len is the number of filter blocks in the array.
*/ */
int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int flen) int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{ {
unsigned char *data = skb->data;
/* len is UNSIGNED. Byte wide insns relies only on implicit
type casts to prevent reading arbitrary memory locations.
*/
unsigned int len = skb->len;
struct sock_filter *fentry; /* We walk down these */ struct sock_filter *fentry; /* We walk down these */
u32 A = 0; /* Accumulator */ u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */ u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
int k; int k;
int pc; int pc;
int *t;
/* /*
* Process array of filter instructions. * Process array of filter instructions.
...@@ -60,53 +82,75 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int ...@@ -60,53 +82,75 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
for(pc = 0; pc < flen; pc++) for(pc = 0; pc < flen; pc++)
{ {
fentry = &filter[pc]; fentry = &filter[pc];
if(fentry->code & BPF_X)
t=&X;
else
t=&fentry->k;
switch(fentry->code) switch(fentry->code)
{ {
case BPF_ALU|BPF_ADD|BPF_X: case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K: case BPF_ALU|BPF_ADD|BPF_K:
A += *t; A += fentry->k;
continue; continue;
case BPF_ALU|BPF_SUB|BPF_X: case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K: case BPF_ALU|BPF_SUB|BPF_K:
A -= *t; A -= fentry->k;
continue; continue;
case BPF_ALU|BPF_MUL|BPF_X: case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
case BPF_ALU|BPF_MUL|BPF_K: case BPF_ALU|BPF_MUL|BPF_K:
A *= *t; A *= X;
continue; continue;
case BPF_ALU|BPF_DIV|BPF_X: case BPF_ALU|BPF_DIV|BPF_X:
if(X == 0)
return (0);
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K: case BPF_ALU|BPF_DIV|BPF_K:
if(*t == 0) if(fentry->k == 0)
return (0); return (0);
A /= *t; A /= fentry->k;
continue; continue;
case BPF_ALU|BPF_AND|BPF_X: case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
case BPF_ALU|BPF_AND|BPF_K: case BPF_ALU|BPF_AND|BPF_K:
A &= *t; A &= fentry->k;
continue; continue;
case BPF_ALU|BPF_OR|BPF_X: case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
case BPF_ALU|BPF_OR|BPF_K: case BPF_ALU|BPF_OR|BPF_K:
A |= *t; A |= fentry->k;
continue; continue;
case BPF_ALU|BPF_LSH|BPF_X: case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
case BPF_ALU|BPF_LSH|BPF_K: case BPF_ALU|BPF_LSH|BPF_K:
A <<= *t; A <<= fentry->k;
continue; continue;
case BPF_ALU|BPF_RSH|BPF_X: case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
case BPF_ALU|BPF_RSH|BPF_K: case BPF_ALU|BPF_RSH|BPF_K:
A >>= *t; A >>= fentry->k;
continue; continue;
case BPF_ALU|BPF_NEG: case BPF_ALU|BPF_NEG:
...@@ -148,26 +192,62 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int ...@@ -148,26 +192,62 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
case BPF_JMP|BPF_JSET|BPF_X: case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf; pc += (A & X) ? fentry->jt : fentry->jf;
continue; continue;
case BPF_LD|BPF_W|BPF_ABS: case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k; k = fentry->k;
if(k + sizeof(long) > len) load_w:
return (0); if(k+sizeof(u32) <= len) {
A = ntohl(*(long*)&data[k]); A = ntohl(*(u32*)&data[k]);
continue; continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohl(*(u32*)ptr);
continue;
}
}
return 0;
case BPF_LD|BPF_H|BPF_ABS: case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k; k = fentry->k;
if(k + sizeof(short) > len) load_h:
return (0); if(k + sizeof(u16) <= len) {
A = ntohs(*(short*)&data[k]); A = ntohs(*(u16*)&data[k]);
continue; continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = ntohs(*(u16*)ptr);
continue;
}
}
return 0;
case BPF_LD|BPF_B|BPF_ABS: case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k; k = fentry->k;
if(k >= len) load_b:
return (0); if(k < len) {
A = data[k]; A = data[k];
continue; continue;
}
if (k<0) {
u8 *ptr;
if (k>=SKF_AD_OFF)
break;
if ((ptr = load_pointer(skb, k)) != NULL) {
A = *ptr;
continue;
}
}
case BPF_LD|BPF_W|BPF_LEN: case BPF_LD|BPF_W|BPF_LEN:
A = len; A = len;
...@@ -177,35 +257,23 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int ...@@ -177,35 +257,23 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
X = len; X = len;
continue; continue;
case BPF_LD|BPF_W|BPF_IND: case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k; k = X + fentry->k;
if(k + sizeof(u32) > len) goto load_w;
return (0);
A = ntohl(*(u32 *)&data[k]);
continue;
case BPF_LD|BPF_H|BPF_IND: case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k; k = X + fentry->k;
if(k + sizeof(u16) > len) goto load_h;
return (0);
A = ntohs(*(u16*)&data[k]);
continue;
case BPF_LD|BPF_B|BPF_IND: case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k; k = X + fentry->k;
if(k >= len) goto load_b;
return (0);
A = data[k];
continue;
case BPF_LDX|BPF_B|BPF_MSH: case BPF_LDX|BPF_B|BPF_MSH:
/*
* Hack for BPF to handle TOS etc
*/
k = fentry->k; k = fentry->k;
if(k >= len) if(k >= len)
return (0); return (0);
X = (data[fentry->k] & 0xf) << 2; X = (data[k] & 0xf) << 2;
continue; continue;
case BPF_LD|BPF_IMM: case BPF_LD|BPF_IMM:
...@@ -216,7 +284,7 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int ...@@ -216,7 +284,7 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
X = fentry->k; X = fentry->k;
continue; continue;
case BPF_LD|BPF_MEM: case BPF_LD|BPF_MEM:
A = mem[fentry->k]; A = mem[fentry->k];
continue; continue;
...@@ -246,15 +314,29 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int ...@@ -246,15 +314,29 @@ int sk_run_filter(unsigned char *data, int len, struct sock_filter *filter, int
mem[fentry->k] = X; mem[fentry->k] = X;
continue; continue;
default: default:
/* Invalid instruction counts as RET */ /* Invalid instruction counts as RET */
return (0); return (0);
} }
/* Handle ancillary data, which are impossible
(or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = htons(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
default:
return 0;
}
} }
printk(KERN_ERR "Filter ruleset ran off the end.\n");
return (0); return (0);
} }
...@@ -279,13 +361,17 @@ int sk_chk_filter(struct sock_filter *filter, int flen) ...@@ -279,13 +361,17 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
ftest = &filter[pc]; ftest = &filter[pc];
if(BPF_CLASS(ftest->code) == BPF_JMP) if(BPF_CLASS(ftest->code) == BPF_JMP)
{ {
/* /*
* But they mustn't jump off the end. * But they mustn't jump off the end.
*/ */
if(BPF_OP(ftest->code) == BPF_JA) if(BPF_OP(ftest->code) == BPF_JA)
{ {
if(pc + ftest->k + 1>= (unsigned)flen) /* Note, the large ftest->k might cause
loops. Compare this with conditional
jumps below, where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return (-EINVAL); return (-EINVAL);
} }
else else
...@@ -302,17 +388,18 @@ int sk_chk_filter(struct sock_filter *filter, int flen) ...@@ -302,17 +388,18 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
* Check that memory operations use valid addresses. * Check that memory operations use valid addresses.
*/ */
if(ftest->k <0 || ftest->k >= BPF_MEMWORDS) if (ftest->k >= BPF_MEMWORDS)
{ {
/* /*
* But it might not be a memory operation... * But it might not be a memory operation...
*/ */
switch (ftest->code) {
if (BPF_CLASS(ftest->code) == BPF_ST) case BPF_ST:
case BPF_STX:
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
return -EINVAL; return -EINVAL;
if((BPF_CLASS(ftest->code) == BPF_LD) && }
(BPF_MODE(ftest->code) == BPF_MEM))
return (-EINVAL);
} }
} }
...@@ -332,35 +419,36 @@ int sk_chk_filter(struct sock_filter *filter, int flen) ...@@ -332,35 +419,36 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{ {
struct sock_filter *fp, *old_filter; struct sk_filter *fp;
int fsize = sizeof(struct sock_filter) * fprog->len; unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err; int err;
/* Make sure new filter is there and in the right amounts. */ /* Make sure new filter is there and in the right amounts. */
if(fprog->filter == NULL || fprog->len == 0 || fsize > BPF_MAXINSNS) if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
return (-EINVAL); return (-EINVAL);
if((err = sk_chk_filter(fprog->filter, fprog->len))==0) fp = (struct sk_filter *)sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
{ if(fp == NULL)
/* If existing filter, remove it first */ return (-ENOMEM);
if(sk->filter)
{
old_filter = sk->filter_data;
kfree_s(old_filter, (sizeof(old_filter) * sk->filter));
sk->filter_data = NULL;
}
fp = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL); if (copy_from_user(fp->insns, fprog->filter, fsize)) {
if(fp == NULL) sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return (-ENOMEM); return -EFAULT;
}
memset(fp,0,sizeof(*fp)); atomic_set(&fp->refcnt, 1);
memcpy(fp, fprog->filter, fsize); /* Copy instructions */ fp->len = fprog->len;
sk->filter = fprog->len; /* Number of filter blocks */ if ((err = sk_chk_filter(fp->insns, fp->len))==0) {
sk->filter_data = fp; /* Filter instructions */ struct sk_filter *old_fp = sk->filter;
sk->filter = fp;
wmb();
fp = old_fp;
} }
if (fp)
sk_filter_release(sk, fp);
return (err); return (err);
} }
#endif /* CONFIG_FILTER */ #endif /* CONFIG_FILTER */
...@@ -155,10 +155,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -155,10 +155,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
int err; int err;
struct linger ling; struct linger ling;
int ret = 0; int ret = 0;
#ifdef CONFIG_FILTER
struct sock_fprog fprog;
#endif
/* /*
* Options without arguments * Options without arguments
...@@ -256,12 +252,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -256,12 +252,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
case SO_PRIORITY: case SO_PRIORITY:
if (val >= 0 && val <= 7) if (val >= 0 && val <= 7)
{
if(val==7 && !capable(CAP_NET_ADMIN))
return -EPERM;
sk->priority = val; sk->priority = val;
else }
return(-EINVAL);
break; break;
case SO_LINGER: case SO_LINGER:
if(optlen<sizeof(ling)) if(optlen<sizeof(ling))
return -EINVAL; /* 1003.1g */ return -EINVAL; /* 1003.1g */
...@@ -310,10 +307,12 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -310,10 +307,12 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen > IFNAMSIZ) if (optlen > IFNAMSIZ)
optlen = IFNAMSIZ; optlen = IFNAMSIZ;
if (copy_from_user(devname, optval, optlen)) if (copy_from_user(devname, optval, optlen))
return -EFAULT; return -EFAULT;
/* Remove any cached route for this socket. */ /* Remove any cached route for this socket. */
lock_sock(sk);
dst_release(xchg(&sk->dst_cache, NULL)); dst_release(xchg(&sk->dst_cache, NULL));
release_sock(sk);
if (devname[0] == '\0') { if (devname[0] == '\0') {
sk->bound_dev_if = 0; sk->bound_dev_if = 0;
...@@ -331,30 +330,32 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -331,30 +330,32 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
#ifdef CONFIG_FILTER #ifdef CONFIG_FILTER
case SO_ATTACH_FILTER: case SO_ATTACH_FILTER:
if(optlen < sizeof(struct sock_fprog)) ret = -EINVAL;
return -EINVAL; if (optlen == sizeof(struct sock_fprog)) {
struct sock_fprog fprog;
if(copy_from_user(&fprog, optval, sizeof(fprog)))
{
ret = -EFAULT; ret = -EFAULT;
break; if (copy_from_user(&fprog, optval, sizeof(fprog)))
} break;
ret = sk_attach_filter(&fprog, sk); ret = sk_attach_filter(&fprog, sk);
}
break; break;
case SO_DETACH_FILTER: case SO_DETACH_FILTER:
if(sk->filter) if(sk->filter) {
{ struct sk_filter *filter;
fprog.filter = sk->filter_data;
kfree_s(fprog.filter, (sizeof(fprog.filter) * sk->filter)); filter = sk->filter;
sk->filter_data = NULL;
sk->filter = 0; sk->filter = NULL;
wmb();
if (filter)
sk_filter_release(sk, filter);
return 0; return 0;
} }
else return -ENOENT;
return -EINVAL;
break;
#endif #endif
/* We implement the SO_SNDLOWAT etc to /* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */ not be settable (1003.1g 5.3) */
...@@ -504,6 +505,16 @@ void sk_free(struct sock *sk) ...@@ -504,6 +505,16 @@ void sk_free(struct sock *sk)
if (sk->destruct) if (sk->destruct)
sk->destruct(sk); sk->destruct(sk);
#ifdef CONFIG_FILTER
if (sk->filter) {
sk_filter_release(sk, sk->filter);
sk->filter = NULL;
}
#endif
if (atomic_read(&sk->omem_alloc))
printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
kmem_cache_free(sk_cachep, sk); kmem_cache_free(sk_cachep, sk);
} }
......
...@@ -1323,6 +1323,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -1323,6 +1323,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newsk->pair = NULL; newsk->pair = NULL;
skb_queue_head_init(&newsk->back_log); skb_queue_head_init(&newsk->back_log);
skb_queue_head_init(&newsk->error_queue); skb_queue_head_init(&newsk->error_queue);
#ifdef CONFIG_FILTER
if (newsk->filter)
sk_filter_charge(newsk, newsk->filter);
#endif
/* Now setup tcp_opt */ /* Now setup tcp_opt */
newtp = &(newsk->tp_pinfo.af_tcp); newtp = &(newsk->tp_pinfo.af_tcp);
...@@ -1553,12 +1557,10 @@ static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb) ...@@ -1553,12 +1557,10 @@ static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{ {
#ifdef CONFIG_FILTER #ifdef CONFIG_FILTER
if (sk->filter) if (sk->filter && sk_filter(skb, sk->filter))
{ goto discard;
if (sk_filter(skb, sk->filter_data, sk->filter))
goto discard;
}
#endif /* CONFIG_FILTER */ #endif /* CONFIG_FILTER */
/* /*
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
* David S. Miller : Charge memory using the right skb * David S. Miller : Charge memory using the right skb
* during syn/ack processing. * during syn/ack processing.
* David S. Miller : Output engine completely rewritten. * David S. Miller : Output engine completely rewritten.
* Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
* *
*/ */
...@@ -135,7 +136,8 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -135,7 +136,8 @@ void tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
(sysctl_flags & SYSCTL_FLAG_SACK), (sysctl_flags & SYSCTL_FLAG_SACK),
(sysctl_flags & SYSCTL_FLAG_WSCALE), (sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rcv_wscale, tp->rcv_wscale,
TCP_SKB_CB(skb)->when); TCP_SKB_CB(skb)->when,
tp->ts_recent);
} else { } else {
tcp_build_and_update_options((__u32 *)(th + 1), tcp_build_and_update_options((__u32 *)(th + 1),
tp, TCP_SKB_CB(skb)->when); tp, TCP_SKB_CB(skb)->when);
...@@ -862,7 +864,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, ...@@ -862,7 +864,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->when = jiffies; TCP_SKB_CB(skb)->when = jiffies;
tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok, tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
req->sack_ok, req->wscale_ok, req->rcv_wscale, req->sack_ok, req->wscale_ok, req->rcv_wscale,
TCP_SKB_CB(skb)->when); TCP_SKB_CB(skb)->when,
req->ts_recent);
skb->csum = 0; skb->csum = 0;
th->doff = (tcp_header_size >> 2); th->doff = (tcp_header_size >> 2);
......
...@@ -75,8 +75,7 @@ void net_timer (unsigned long data) ...@@ -75,8 +75,7 @@ void net_timer (unsigned long data)
/* Only process if socket is not in use. */ /* Only process if socket is not in use. */
if (atomic_read(&sk->sock_readers)) { if (atomic_read(&sk->sock_readers)) {
/* Try again later. */ /* Try again later. */
sk->timer.expires = jiffies+HZ/20; mod_timer(&sk->timer, jiffies+HZ/20);
add_timer(&sk->timer);
return; return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment