Commit 5fcf9e32 authored by David S. Miller's avatar David S. Miller

Merge davem@nuts.davemloft.net:/disk1/BK/sparc-2.6

into kernel.bkbits.net:/home/davem/sparc-2.6
parents c3adda81 9d8e9a2d
......@@ -635,6 +635,7 @@ new_setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
regs->u_regs[UREG_FP] = (unsigned long) sf;
regs->u_regs[UREG_I0] = signo;
regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
/* 4. signal handler */
regs->pc = (unsigned long) ka->sa.sa_handler;
......@@ -719,6 +720,7 @@ new_setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
regs->u_regs[UREG_FP] = (unsigned long) sf;
regs->u_regs[UREG_I0] = signo;
regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
regs->u_regs[UREG_I2] = (unsigned long) &sf->regs;
regs->pc = (unsigned long) ka->sa.sa_handler;
regs->npc = (regs->pc + 4);
......
......@@ -56,6 +56,39 @@ static void __iommu_flushall(struct pci_iommu *iommu)
}
}
#define IOPTE_CONSISTENT(CTX) \
(IOPTE_VALID | IOPTE_CACHE | \
(((CTX) << 47) & IOPTE_CONTEXT))
#define IOPTE_STREAMING(CTX) \
(IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
/* Existing mappings are never marked invalid, instead they
* are pointed to a dummy page.
*/
#define IOPTE_IS_DUMMY(iommu, iopte) \
((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
{
unsigned long val = iopte_val(*iopte);
val &= ~IOPTE_PAGE;
val |= iommu->dummy_page_pa;
iopte_val(*iopte) = val;
}
void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize)
{
int i;
tsbsize /= sizeof(iopte_t);
for (i = 0; i < tsbsize; i++)
iopte_make_dummy(iommu, &iommu->page_table[i]);
}
static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages)
{
iopte_t *iopte, *limit, *first;
......@@ -79,7 +112,7 @@ static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long n
first = iopte;
for (;;) {
if (iopte_val(*iopte) == 0UL) {
if (IOPTE_IS_DUMMY(iommu, iopte)) {
if ((iopte + (1 << cnum)) >= limit)
ent = 0;
else
......@@ -142,12 +175,12 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long
iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS));
while (iopte > iommu->page_table) {
iopte--;
if (!(iopte_val(*iopte) & IOPTE_VALID)) {
if (IOPTE_IS_DUMMY(iommu, iopte)) {
unsigned long tmp = npages;
while (--tmp) {
iopte--;
if (iopte_val(*iopte) & IOPTE_VALID)
if (!IOPTE_IS_DUMMY(iommu, iopte))
break;
}
if (tmp == 0) {
......@@ -162,15 +195,6 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long
return NULL;
}
#define IOPTE_CONSISTENT(CTX) \
(IOPTE_VALID | IOPTE_CACHE | \
(((CTX) << 47) & IOPTE_CONTEXT))
#define IOPTE_STREAMING(CTX) \
(IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
#define IOPTE_INVALID 0UL
/* Allocate and map kernel buffer of size SIZE using consistent mode
* DMA for PCI device PDEV. Return non-NULL cpu-side address if
* successful and set *DMA_ADDRP to the PCI side dma address.
......@@ -261,7 +285,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
limit = (iommu->page_table +
(1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
while (walk < limit) {
if (iopte_val(*walk) != IOPTE_INVALID)
if (!IOPTE_IS_DUMMY(iommu, walk))
break;
walk++;
}
......@@ -280,7 +304,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
for (i = 0; i < npages; i++, iopte++)
iopte_val(*iopte) = IOPTE_INVALID;
iopte_make_dummy(iommu, iopte);
if (iommu->iommu_ctxflush) {
pci_iommu_write(iommu->iommu_ctxflush, ctx);
......@@ -376,7 +400,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
base = iommu->page_table +
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
#ifdef DEBUG_PCI_IOMMU
if (iopte_val(*base) == IOPTE_INVALID)
if (IOPTE_IS_DUMMY(iommu, base))
printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n",
bus_addr, sz, __builtin_return_address(0));
#endif
......@@ -415,7 +439,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
}
/* Step 2: Clear out first TSB entry. */
iopte_val(*base) = IOPTE_INVALID;
iopte_make_dummy(iommu, base);
free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
npages, ctx);
......@@ -611,7 +635,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
#ifdef DEBUG_PCI_IOMMU
if (iopte_val(*base) == IOPTE_INVALID)
if (IOPTE_IS_DUMMY(iommu, base))
printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", sglist->dma_address, nelems, __builtin_return_address(0));
#endif
......@@ -648,7 +672,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
}
/* Step 2: Clear out first TSB entry. */
iopte_val(*base) = IOPTE_INVALID;
iopte_make_dummy(iommu, base);
free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
npages, ctx);
......
......@@ -1241,6 +1241,14 @@ static void __init psycho_iommu_init(struct pci_controller_info *p)
* in pci_iommu.c
*/
iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
if (!iommu->dummy_page) {
prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
prom_halt();
}
memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
/* Using assumed page size 8K with 128K entries we need 1MB iommu page
* table (128K ioptes * 8 bytes per iopte). This is
* page order 7 on UltraSparc.
......@@ -1254,7 +1262,7 @@ static void __init psycho_iommu_init(struct pci_controller_info *p)
iommu->page_table_sz_bits = 17;
iommu->page_table_map_base = 0xc0000000;
iommu->dma_addr_mask = 0xffffffff;
memset((char *)tsbbase, 0, IO_TSB_SIZE);
pci_iommu_table_init(iommu, IO_TSB_SIZE);
/* We start with no consistent mappings. */
iommu->lowest_consistent_map =
......
......@@ -1293,6 +1293,14 @@ static void __init sabre_iommu_init(struct pci_controller_info *p,
* in pci_iommu.c
*/
iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
if (!iommu->dummy_page) {
prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
prom_halt();
}
memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8));
if (!tsbbase) {
prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n");
......@@ -1301,7 +1309,7 @@ static void __init sabre_iommu_init(struct pci_controller_info *p,
iommu->page_table = (iopte_t *)tsbbase;
iommu->page_table_map_base = dvma_offset;
iommu->dma_addr_mask = dma_mask;
memset((char *)tsbbase, 0, PAGE_SIZE << order);
pci_iommu_table_init(iommu, PAGE_SIZE << order);
sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase));
......
......@@ -1766,6 +1766,14 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
* in pci_iommu.c
*/
iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
if (!iommu->dummy_page) {
prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
prom_halt();
}
memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
/* Using assumed page size 8K with 128K entries we need 1MB iommu page
* table (128K ioptes * 8 bytes per iopte). This is
* page order 7 on UltraSparc.
......@@ -1780,7 +1788,7 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
iommu->page_table = (iopte_t *)tsbbase;
iommu->page_table_map_base = vdma[0];
iommu->dma_addr_mask = dma_mask;
memset((char *)tsbbase, 0, PAGE_SIZE << order);
pci_iommu_table_init(iommu, PAGE_SIZE << order);
switch (tsbsize) {
case 64:
......
......@@ -545,6 +545,12 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
regs->u_regs[UREG_I0] = signo;
regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
/* The sigcontext is passed in this way because of how it
* is defined in GLIBC's /usr/include/bits/sigcontext.h
* for sparc64. It includes the 128 bytes of siginfo_t.
*/
regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
/* 5. signal handler */
regs->tpc = (unsigned long) ka->sa.sa_handler;
regs->tnpc = (regs->tpc + 4);
......
......@@ -749,6 +749,7 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
regs->u_regs[UREG_FP] = (unsigned long) sf;
regs->u_regs[UREG_I0] = signo;
regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
/* 4. signal handler */
regs->tpc = (unsigned long) ka->sa.sa_handler;
......@@ -1157,6 +1158,7 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
regs->u_regs[UREG_FP] = (unsigned long) sf;
regs->u_regs[UREG_I0] = signr;
regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
regs->u_regs[UREG_I2] = (unsigned long) &sf->regs;
/* 4. signal handler */
regs->tpc = (unsigned long) ka->sa.sa_handler;
......
......@@ -74,8 +74,6 @@ extern void *__bzero(void *, size_t);
extern void *__memscan_zero(void *, size_t);
extern void *__memscan_generic(void *, int, size_t);
extern int __memcmp(const void *, const void *, __kernel_size_t);
extern int __strncmp(const char *, const char *, __kernel_size_t);
extern __kernel_size_t __strlen(const char *);
extern __kernel_size_t strlen(const char *);
extern void linux_sparc_syscall(void);
extern void rtrap(void);
......@@ -187,8 +185,6 @@ EXPORT_SYMBOL(atomic_dec_and_lock);
EXPORT_SYMBOL(___test_and_set_bit);
EXPORT_SYMBOL(___test_and_clear_bit);
EXPORT_SYMBOL(___test_and_change_bit);
EXPORT_SYMBOL(___test_and_set_le_bit);
EXPORT_SYMBOL(___test_and_clear_le_bit);
/* Bit searching */
EXPORT_SYMBOL(find_next_bit);
......@@ -295,7 +291,6 @@ EXPORT_SYMBOL(__prom_getchild);
EXPORT_SYMBOL(__prom_getsibling);
/* sparc library symbols */
EXPORT_SYMBOL(__strlen);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(__strlen_user);
......@@ -334,7 +329,6 @@ EXPORT_SYMBOL(sys_close);
#endif
/* Special internal versions of library functions. */
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(_clear_page);
EXPORT_SYMBOL(clear_user_page);
EXPORT_SYMBOL(copy_user_page);
......@@ -342,8 +336,7 @@ EXPORT_SYMBOL(__bzero);
EXPORT_SYMBOL(__memscan_zero);
EXPORT_SYMBOL(__memscan_generic);
EXPORT_SYMBOL(__memcmp);
EXPORT_SYMBOL(__strncmp);
EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(csum_partial);
......@@ -351,9 +344,12 @@ EXPORT_SYMBOL(csum_partial_copy_sparc64);
EXPORT_SYMBOL(ip_fast_csum);
/* Moving data to/from/in userspace. */
EXPORT_SYMBOL(__copy_to_user);
EXPORT_SYMBOL(__copy_from_user);
EXPORT_SYMBOL(__copy_in_user);
EXPORT_SYMBOL(___copy_to_user);
EXPORT_SYMBOL(___copy_from_user);
EXPORT_SYMBOL(___copy_in_user);
EXPORT_SYMBOL(copy_to_user_fixup);
EXPORT_SYMBOL(copy_from_user_fixup);
EXPORT_SYMBOL(copy_in_user_fixup);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__bzero_noasi);
......@@ -374,6 +370,7 @@ EXPORT_SYMBOL_NOVERS(memcmp);
EXPORT_SYMBOL_NOVERS(memcpy);
EXPORT_SYMBOL_NOVERS(memset);
EXPORT_SYMBOL_NOVERS(memmove);
EXPORT_SYMBOL_NOVERS(strncmp);
void VISenter(void);
/* RAID code needs this */
......
......@@ -7,11 +7,12 @@ EXTRA_CFLAGS := -Werror
lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \
VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \
VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \
VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \
U3memcpy.o U3copy_from_user.o U3copy_to_user.o \
U3copy_in_user.o mcount.o ipcsum.o rwsem.o xor.o splock.o \
find_bit.o
U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
copy_in_user.o user_fixup.o memmove.o \
mcount.o ipcsum.o rwsem.o xor.o splock.o find_bit.o
lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
/* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace.
*
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
#define EX_LD(x) \
98: x; \
.section .fixup; \
.align 4; \
99: retl; \
mov 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define FUNC_NAME ___copy_from_user
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest
#define EX_RETVAL(x) 0
/* Writing to %asi is _expensive_ so we hardcode it.
* Reading %asi to check for KERNEL_DS is comparatively
* cheap.
*/
#define PREAMBLE \
rd %asi, %g1; \
cmp %g1, ASI_AIUS; \
bne,pn %icc, memcpy_user_stub; \
nop; \
#include "U1memcpy.S"
/* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace.
*
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
#define EX_ST(x) \
98: x; \
.section .fixup; \
.align 4; \
99: retl; \
mov 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define FUNC_NAME ___copy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
#define EX_RETVAL(x) 0
/* Writing to %asi is _expensive_ so we hardcode it.
* Reading %asi to check for KERNEL_DS is comparatively
* cheap.
*/
#define PREAMBLE \
rd %asi, %g1; \
cmp %g1, ASI_AIUS; \
bne,pn %icc, memcpy_user_stub; \
nop; \
#include "U1memcpy.S"
This diff is collapsed.
......@@ -3,410 +3,20 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV_RAW(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: ba U3cfu_fixup; \
a, b, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: add %o1, %o3, %o0; \
ba U3cfu_fixup; \
a, b, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV4(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: add %o1, %o3, %o0; \
a, b, %o1; \
ba U3cfu_fixup; \
add %o1, 4, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV8(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: add %o1, %o3, %o0; \
a, b, %o1; \
ba U3cfu_fixup; \
add %o1, 8, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
ba U3cfu_fixup; \
a, b, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EX2(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
and %o2, (0x40 - 1), %o1; \
add %o1, %o4, %o1; \
ba U3cfu_fixup; \
add %o1, 0x1c0, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EX3(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
and %o2, (0x40 - 1), %o1; \
sll %g3, 6, %g3; \
add %o1, 0x80, %o1; \
ba U3cfu_fixup; \
add %o1, %g3, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EX4(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
and %o2, (0x40 - 1), %o1; \
add %o1, 0x40, %o1; \
ba U3cfu_fixup; \
add %o1, %g3, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
.register %g2,#scratch
.register %g3,#scratch
/* Special/non-trivial issues of this code:
*
* 1) %o5 is preserved from VISEntryHalf to VISExitHalf
* 2) Only low 32 FPU registers are used so that only the
* lower half of the FPU register set is dirtied by this
* code. This is especially important in the kernel.
* 3) This code never prefetches cachelines past the end
* of the source buffer.
*/
.text
.align 32
/* The cheetah's flexible spine, oversized liver, enlarged heart,
* slender muscular body, and claws make it the swiftest hunter
* in Africa and the fastest animal on land. Can reach speeds
* of up to 2.4GB per second.
*/
.globl U3copy_from_user
U3copy_from_user: /* %o0=dst, %o1=src, %o2=len */
cmp %o2, 0
be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
bleu,a,pn %XCC, 80f
or %o3, %o2, %o3
cmp %o2, 256
blu,pt %XCC, 70f
andcc %o3, 0x7, %g0
ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller.
*/
.align 64
1:
/* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f
/* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
* of bytes to copy to make 'dst' 64-byte aligned. We pre-
* subtract this from 'len'.
*/
sub %g2, 0x40, %g2
sub %g0, %g2, %g2
sub %o2, %g2, %o2
/* Copy %g2 bytes from src to dst, one byte at a time. */
1: EXNV_RAW(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2)
add %o1, 0x1, %o1
add %o0, 0x1, %o0
subcc %g2, 0x1, %g2
bg,pt %XCC, 1b
stb %o3, [%o0 + -1]
2: VISEntryHalf
and %o1, 0x7, %g1
ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1
.align 64
1:
membar #StoreLoad | #StoreStore | #LoadStore
prefetcha [%o1 + 0x000] %asi, #one_read
prefetcha [%o1 + 0x040] %asi, #one_read
andn %o2, (0x40 - 1), %o4
prefetcha [%o1 + 0x080] %asi, #one_read
prefetcha [%o1 + 0x0c0] %asi, #one_read
EX(ldda [%o1 + 0x000] %asi, %f0, add %o2, %g0)
prefetcha [%o1 + 0x100] %asi, #one_read
EX(ldda [%o1 + 0x008] %asi, %f2, add %o2, %g0)
prefetcha [%o1 + 0x140] %asi, #one_read
EX(ldda [%o1 + 0x010] %asi, %f4, add %o2, %g0)
prefetcha [%o1 + 0x180] %asi, #one_read
faligndata %f0, %f2, %f16
EX(ldda [%o1 + 0x018] %asi, %f6, add %o2, %g0)
faligndata %f2, %f4, %f18
EX(ldda [%o1 + 0x020] %asi, %f8, add %o2, %g0)
faligndata %f4, %f6, %f20
EX(ldda [%o1 + 0x028] %asi, %f10, add %o2, %g0)
faligndata %f6, %f8, %f22
EX(ldda [%o1 + 0x030] %asi, %f12, add %o2, %g0)
faligndata %f8, %f10, %f24
EX(ldda [%o1 + 0x038] %asi, %f14, add %o2, %g0)
faligndata %f10, %f12, %f26
EX(ldda [%o1 + 0x040] %asi, %f0, add %o2, %g0)
sub %o4, 0x80, %o4
add %o1, 0x40, %o1
ba,pt %xcc, 1f
srl %o4, 6, %o3
.align 64
1:
EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4)
faligndata %f14, %f0, %f30
stda %f16, [%o0] ASI_BLK_P
EX3(ldda [%o1 + 0x018] %asi, %f6)
faligndata %f0, %f2, %f16
EX3(ldda [%o1 + 0x020] %asi, %f8)
faligndata %f2, %f4, %f18
EX3(ldda [%o1 + 0x028] %asi, %f10)
faligndata %f4, %f6, %f20
EX3(ldda [%o1 + 0x030] %asi, %f12)
faligndata %f6, %f8, %f22
EX3(ldda [%o1 + 0x038] %asi, %f14)
faligndata %f8, %f10, %f24
EX3(ldda [%o1 + 0x040] %asi, %f0)
prefetcha [%o1 + 0x180] %asi, #one_read
faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3
add %o1, 0x40, %o1
bg,pt %XCC, 1b
add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */
EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4)
faligndata %f14, %f0, %f30
stda %f16, [%o0] ASI_BLK_P
EX3(ldda [%o1 + 0x018] %asi, %f6)
faligndata %f0, %f2, %f16
EX3(ldda [%o1 + 0x020] %asi, %f8)
faligndata %f2, %f4, %f18
EX3(ldda [%o1 + 0x028] %asi, %f10)
faligndata %f4, %f6, %f20
EX3(ldda [%o1 + 0x030] %asi, %f12)
faligndata %f6, %f8, %f22
EX3(ldda [%o1 + 0x038] %asi, %f14)
faligndata %f8, %f10, %f24
cmp %g1, 0
be,pt %XCC, 1f
add %o0, 0x40, %o0
EX4(ldda [%o1 + 0x040] %asi, %f0)
1: faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
stda %f16, [%o0] ASI_BLK_P
add %o0, 0x40, %o0
add %o1, 0x40, %o1
membar #Sync
/* Now we copy the (len modulo 64) bytes at the end.
* Note how we borrow the %f0 loaded above.
*
* Also notice how this code is careful not to perform a
* load past the end of the src buffer.
*/
and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2
be,pn %XCC, 10f
subcc %g2, 0x8, %g2
be,pn %XCC, 10f
cmp %g1, 0
be,a,pt %XCC, 1f
EX(ldda [%o1 + 0x00] %asi, %f0, add %o2, %g0)
1: EX(ldda [%o1 + 0x08] %asi, %f2, add %o2, %g0)
add %o1, 0x8, %o1
sub %o2, 0x8, %o2
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
std %f8, [%o0 + 0x00]
be,pn %XCC, 10f
add %o0, 0x8, %o0
EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0)
add %o1, 0x8, %o1
sub %o2, 0x8, %o2
subcc %g2, 0x8, %g2
faligndata %f2, %f0, %f8
std %f8, [%o0 + 0x00]
bne,pn %XCC, 1b
add %o0, 0x8, %o0
/* If anything is left, we copy it one byte at a time.
* Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed.
*/
10:
cmp %o2, 0
add %o1, %g1, %o1
VISExitHalf
be,pn %XCC, 85f
sub %o0, %o1, %o3
andcc %g1, 0x7, %g0
bne,pn %icc, 90f
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
EXNV(ldxa [%o1] %asi, %o5, add %o2, %g0)
stx %o5, [%o1 + %o3]
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %icc, 1f
nop
EXNV(lduwa [%o1] %asi, %o5, and %o2, 0x7)
stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1
1: andcc %o2, 0x2, %g0
be,pt %icc, 1f
nop
EXNV(lduha [%o1] %asi, %o5, and %o2, 0x3)
sth %o5, [%o1 + %o3]
add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0
be,pt %icc, 85f
nop
EXNV(lduba [%o1] %asi, %o5, and %o2, 0x1)
ba,pt %xcc, 85f
stb %o5, [%o1 + %o3]
70: /* 16 < len <= 64 */
bne,pn %XCC, 90f
sub %o0, %o1, %o3
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
EXNV8(ldxa [%o1] %asi, %o5, add %o2, %o4)
stx %o5, [%o1 + %o3]
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
EXNV4(lduwa [%o1] %asi, %o5, add %o2, %g0)
stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
nop
ba,pt %xcc, 90f
nop
80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
bne,pn %XCC, 90f
sub %o0, %o1, %o3
1:
subcc %o2, 4, %o2
EXNV(lduwa [%o1] %asi, %g1, add %o2, %g0)
stw %g1, [%o1 + %o3]
bgu,pt %XCC, 1b
add %o1, 4, %o1
85: retl
clr %o0
.align 32
90:
subcc %o2, 1, %o2
EXNV(lduba [%o1] %asi, %g1, add %o2, %g0)
stb %g1, [%o1 + %o3]
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
clr %o0
U3cfu_fixup:
/* Since this is copy_from_user(), zero out the rest of the
* kernel buffer.
*/
cmp %o1, 0
ble,pn %icc, 2f
mov %o1, %g2
1: subcc %g2, 1, %g2
stb %g0, [%o0]
bne,pt %icc, 1b
add %o0, 1, %o0
2: retl
mov %o1, %o0
#define EX_LD(x) \
98: x; \
.section .fixup; \
.align 4; \
99: retl; \
mov 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define FUNC_NAME U3copy_from_user
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
#define EX_RETVAL(x) 0
#include "U3memcpy.S"
/* U3copy_to_user.S: UltraSparc-III optimized memcpy.
/* U3copy_to_user.S: UltraSparc-III optimized copy to userspace.
*
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: retl; \
a, b, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV2(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV3(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 4, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV4(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 8, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
retl; \
a, b, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
#define EX_ST(x) \
98: x; \
.section .fixup; \
.align 4; \
99: retl; \
mov 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXBLK1(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
add %o4, 0x1c0, %o1; \
and %o2, (0x40 - 1), %o2; \
retl; \
add %o1, %o2, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXBLK2(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
sll %o3, 6, %o3; \
and %o2, (0x40 - 1), %o2; \
add %o3, 0x80, %o1; \
retl; \
add %o1, %o2, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXBLK3(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
and %o2, (0x40 - 1), %o2; \
retl; \
add %o2, 0x80, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXBLK4(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: VISExitHalf; \
and %o2, (0x40 - 1), %o2; \
retl; \
add %o2, 0x40, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
.register %g2,#scratch
.register %g3,#scratch
/* Special/non-trivial issues of this code:
*
* 1) %o5 is preserved from VISEntryHalf to VISExitHalf
* 2) Only low 32 FPU registers are used so that only the
* lower half of the FPU register set is dirtied by this
* code. This is especially important in the kernel.
* 3) This code never prefetches cachelines past the end
* of the source buffer.
*/
.text
.align 32
/* The cheetah's flexible spine, oversized liver, enlarged heart,
* slender muscular body, and claws make it the swiftest hunter
* in Africa and the fastest animal on land. Can reach speeds
* of up to 2.4GB per second.
*/
#define FUNC_NAME U3copy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
#define EX_RETVAL(x) 0
.globl U3copy_to_user
U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */
/* Writing to %asi is _expensive_ so we hardcode it.
* Reading %asi to check for KERNEL_DS is comparatively
* cheap.
*/
rd %asi, %g1
cmp %g1, ASI_AIUS
bne,pn %icc, U3memcpy_user_stub
nop
cmp %o2, 0
be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
bleu,a,pn %XCC, 80f
or %o3, %o2, %o3
cmp %o2, 256
blu,pt %XCC, 70f
andcc %o3, 0x7, %g0
ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller.
*/
.align 64
1:
/* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f
/* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
* of bytes to copy to make 'dst' 64-byte aligned. We pre-
* subtract this from 'len'.
*/
sub %g2, 0x40, %g2
sub %g0, %g2, %g2
sub %o2, %g2, %o2
/* Copy %g2 bytes from src to dst, one byte at a time. */
1: ldub [%o1 + 0x00], %o3
add %o1, 0x1, %o1
add %o0, 0x1, %o0
subcc %g2, 0x1, %g2
bg,pt %XCC, 1b
EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2)
2: VISEntryHalf
and %o1, 0x7, %g1
ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1
.align 64
1:
membar #StoreLoad | #StoreStore | #LoadStore
prefetch [%o1 + 0x000], #one_read
prefetch [%o1 + 0x040], #one_read
andn %o2, (0x40 - 1), %o4
prefetch [%o1 + 0x080], #one_read
prefetch [%o1 + 0x0c0], #one_read
ldd [%o1 + 0x000], %f0
prefetch [%o1 + 0x100], #one_read
ldd [%o1 + 0x008], %f2
prefetch [%o1 + 0x140], #one_read
ldd [%o1 + 0x010], %f4
prefetch [%o1 + 0x180], #one_read
faligndata %f0, %f2, %f16
ldd [%o1 + 0x018], %f6
faligndata %f2, %f4, %f18
ldd [%o1 + 0x020], %f8
faligndata %f4, %f6, %f20
ldd [%o1 + 0x028], %f10
faligndata %f6, %f8, %f22
ldd [%o1 + 0x030], %f12
faligndata %f8, %f10, %f24
ldd [%o1 + 0x038], %f14
faligndata %f10, %f12, %f26
ldd [%o1 + 0x040], %f0
sub %o4, 0x80, %o4
add %o1, 0x40, %o1
ba,pt %xcc, 1f
srl %o4, 6, %o3
.align 64
1:
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
faligndata %f14, %f0, %f30
EXBLK2(stda %f16, [%o0] ASI_BLK_AIUS)
ldd [%o1 + 0x018], %f6
faligndata %f0, %f2, %f16
ldd [%o1 + 0x020], %f8
faligndata %f2, %f4, %f18
ldd [%o1 + 0x028], %f10
faligndata %f4, %f6, %f20
ldd [%o1 + 0x030], %f12
faligndata %f6, %f8, %f22
ldd [%o1 + 0x038], %f14
faligndata %f8, %f10, %f24
ldd [%o1 + 0x040], %f0
prefetch [%o1 + 0x180], #one_read
faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3
add %o1, 0x40, %o1
bg,pt %XCC, 1b
add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
faligndata %f14, %f0, %f30
EXBLK3(stda %f16, [%o0] ASI_BLK_AIUS)
ldd [%o1 + 0x018], %f6
faligndata %f0, %f2, %f16
ldd [%o1 + 0x020], %f8
faligndata %f2, %f4, %f18
ldd [%o1 + 0x028], %f10
faligndata %f4, %f6, %f20
ldd [%o1 + 0x030], %f12
faligndata %f6, %f8, %f22
ldd [%o1 + 0x038], %f14
faligndata %f8, %f10, %f24
cmp %g1, 0
be,pt %XCC, 1f
add %o0, 0x40, %o0
ldd [%o1 + 0x040], %f0
1: faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
EXBLK4(stda %f16, [%o0] ASI_BLK_AIUS)
add %o0, 0x40, %o0
add %o1, 0x40, %o1
membar #Sync
/* Now we copy the (len modulo 64) bytes at the end.
* Note how we borrow the %f0 loaded above.
*
* Also notice how this code is careful not to perform a
* load past the end of the src buffer.
*/
and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2
be,pn %XCC, 2f
subcc %g2, 0x8, %g2
be,pn %XCC, 2f
cmp %g1, 0
be,a,pt %XCC, 1f
ldd [%o1 + 0x00], %f0
1: ldd [%o1 + 0x08], %f2
add %o1, 0x8, %o1
sub %o2, 0x8, %o2
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8)
be,pn %XCC, 2f
add %o0, 0x8, %o0
ldd [%o1 + 0x08], %f0
add %o1, 0x8, %o1
sub %o2, 0x8, %o2
subcc %g2, 0x8, %g2
faligndata %f2, %f0, %f8
EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8)
bne,pn %XCC, 1b
add %o0, 0x8, %o0
/* If anything is left, we copy it one byte at a time.
* Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed.
*/
2:
cmp %o2, 0
add %o1, %g1, %o1
VISExitHalf
be,pn %XCC, 85f
sub %o0, %o1, %o3
andcc %g1, 0x7, %g0
bne,pn %icc, 90f
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
ldx [%o1], %o5
EXNV(stxa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0
be,pt %icc, 1f
nop
lduw [%o1], %o5
EXNV(stwa %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x7)
add %o1, 0x4, %o1
1: andcc %o2, 0x2, %g0
be,pt %icc, 1f
nop
lduh [%o1], %o5
EXNV(stha %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x3)
add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0
be,pt %icc, 85f
nop
ldub [%o1], %o5
ba,pt %xcc, 85f
EXNV(stba %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x1)
70: /* 16 < len <= 64 */
bne,pn %XCC, 90f
sub %o0, %o1, %o3
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
ldx [%o1], %o5
EXNV4(stxa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %o4)
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
lduw [%o1], %o5
EXNV3(stwa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, 85f
nop
ba,pt %xcc, 90f
nop
80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
bne,pn %XCC, 90f
sub %o0, %o1, %o3
1:
subcc %o2, 4, %o2
lduw [%o1], %g1
EXNV3(stwa %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, 1b
add %o1, 4, %o1
85: retl
clr %o0
#define PREAMBLE \
rd %asi, %g1; \
cmp %g1, ASI_AIUS; \
bne,pn %icc, memcpy_user_stub; \
nop; \
.align 32
90:
subcc %o2, 1, %o2
ldub [%o1], %g1
EXNV2(stba %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
clr %o0
#include "U3memcpy.S"
This diff is collapsed.
/* U3patch.S: Patch Ultra-I routines with Ultra-III variant.
*
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
*/
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define ULTRA3_DO_PATCH(OLD, NEW) \
sethi %hi(NEW), %g1; \
or %g1, %lo(NEW), %g1; \
sethi %hi(OLD), %g2; \
or %g2, %lo(OLD), %g2; \
sub %g1, %g2, %g1; \
sethi %hi(BRANCH_ALWAYS), %g3; \
srl %g1, 2, %g1; \
or %g3, %lo(BRANCH_ALWAYS), %g3; \
or %g3, %g1, %g3; \
stw %g3, [%g2]; \
sethi %hi(NOP), %g3; \
or %g3, %lo(NOP), %g3; \
stw %g3, [%g2 + 0x4]; \
flush %g2;
.globl cheetah_patch_copyops
cheetah_patch_copyops:
ULTRA3_DO_PATCH(memcpy, U3memcpy)
ULTRA3_DO_PATCH(___copy_from_user, U3copy_from_user)
ULTRA3_DO_PATCH(___copy_to_user, U3copy_to_user)
retl
nop
This diff is collapsed.
......@@ -64,41 +64,3 @@ ___test_and_change_bit: /* %o0=nr, %o1=addr */
2: retl
membar #StoreLoad | #StoreStore
nop
.globl ___test_and_set_le_bit
___test_and_set_le_bit: /* %o0=nr, %o1=addr */
srlx %o0, 5, %g1
mov 1, %g5
sllx %g1, 2, %g3
and %o0, 31, %g2
sllx %g5, %g2, %g5
add %o1, %g3, %o1
lduwa [%o1] ASI_PL, %g7
1: andcc %g7, %g5, %o0
bne,pn %icc, 2f
xor %g7, %g5, %g1
casa [%o1] ASI_PL, %g7, %g1
cmp %g7, %g1
bne,a,pn %icc, 1b
lduwa [%o1] ASI_PL, %g7
2: retl
membar #StoreLoad | #StoreStore
.globl ___test_and_clear_le_bit
___test_and_clear_le_bit: /* %o0=nr, %o1=addr */
srlx %o0, 5, %g1
mov 1, %g5
sllx %g1, 2, %g3
and %o0, 31, %g2
sllx %g5, %g2, %g5
add %o1, %g3, %o1
lduwa [%o1] ASI_PL, %g7
1: andcc %g7, %g5, %o0
be,pn %icc, 2f
xor %g7, %g5, %g1
casa [%o1] ASI_PL, %g7, %g1
cmp %g7, %g1
bne,a,pn %icc, 1b
lduwa [%o1] ASI_PL, %g7
2: retl
membar #StoreLoad | #StoreStore
/* U3copy_in_user.S: UltraSparc-III optimized memcpy.
/* copy_in_user.S: Copy from userspace to userspace.
*
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
#define XCC xcc
#define EXNV(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: retl; \
a, b, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV1(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV4(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 4, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
#define EXNV8(x,y,a,b) \
98: x,y; \
.section .fixup; \
.align 4; \
99: a, b, %o0; \
retl; \
add %o0, 8, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
#define EX(x,y) \
98: x,y; \
.section .fixup; \
.align 4; \
99: retl; \
mov 1, %o0; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
.text; \
.align 4;
.register %g2,#scratch
......@@ -70,71 +31,84 @@
* to copy register windows around during thread cloning.
*/
.globl U3copy_in_user
U3copy_in_user: /* %o0=dst, %o1=src, %o2=len */
.globl ___copy_in_user
___copy_in_user: /* %o0=dst, %o1=src, %o2=len */
/* Writing to %asi is _expensive_ so we hardcode it.
* Reading %asi to check for KERNEL_DS is comparatively
* cheap.
*/
rd %asi, %g1
cmp %g1, ASI_AIUS
bne,pn %icc, U3memcpy_user_stub
bne,pn %icc, memcpy_user_stub
nop
cmp %o2, 0
be,pn %XCC, out
be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
bleu,a,pn %XCC, small_copy
bleu,a,pn %XCC, 80f
or %o3, %o2, %o3
medium_copy: /* 16 < len <= 64 */
/* 16 < len <= 64 */
andcc %o3, 0x7, %g0
bne,pn %XCC, small_copy_unaligned
bne,pn %XCC, 90f
sub %o0, %o1, %o3
medium_copy_aligned:
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
EXNV8(ldxa [%o1] %asi, %o5, add %o4, %o2)
EXNV8(stxa %o5, [%o1 + %o3] ASI_AIUS, add %o4, %o2)
EX(ldxa [%o1] %asi, %o5)
EX(stxa %o5, [%o1 + %o3] ASI_AIUS)
bgu,pt %XCC, 1b
add %o1, 0x8, %o1
andcc %o2, 0x4, %g0
be,pt %XCC, 1f
nop
sub %o2, 0x4, %o2
EXNV4(lduwa [%o1] %asi, %o5, add %o4, %o2)
EXNV4(stwa %o5, [%o1 + %o3] ASI_AIUS, add %o4, %o2)
EX(lduwa [%o1] %asi, %o5)
EX(stwa %o5, [%o1 + %o3] ASI_AIUS)
add %o1, 0x4, %o1
1: cmp %o2, 0
be,pt %XCC, out
be,pt %XCC, 85f
nop
ba,pt %xcc, small_copy_unaligned
ba,pt %xcc, 90f
nop
small_copy: /* 0 < len <= 16 */
80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
bne,pn %XCC, small_copy_unaligned
bne,pn %XCC, 90f
sub %o0, %o1, %o3
small_copy_aligned:
82:
subcc %o2, 4, %o2
EXNV4(lduwa [%o1] %asi, %g1, add %o2, %g0)
EXNV4(stwa %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, small_copy_aligned
EX(lduwa [%o1] %asi, %g1)
EX(stwa %g1, [%o1 + %o3] ASI_AIUS)
bgu,pt %XCC, 82b
add %o1, 4, %o1
out: retl
85: retl
clr %o0
.align 32
small_copy_unaligned:
90:
subcc %o2, 1, %o2
EXNV1(lduba [%o1] %asi, %g1, add %o2, %g0)
EXNV1(stba %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
bgu,pt %XCC, small_copy_unaligned
EX(lduba [%o1] %asi, %g1)
EX(stba %g1, [%o1 + %o3] ASI_AIUS)
bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
clr %o0
/* Act like copy_{to,in}_user(), ie. return zero instead
* of original destination pointer. This is invoked when
* copy_{to,in}_user() finds that %asi is kernel space.
*/
.globl memcpy_user_stub
memcpy_user_stub:
save %sp, -192, %sp
mov %i0, %o0
mov %i1, %o1
call memcpy
mov %i2, %o2
ret
restore %g0, %g0, %o0
/* memmove.S: Simple memmove implementation.
*
* Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
* Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
*/
.text
.align 32
.globl memmove
memmove:
mov %o0, %g1
cmp %o0, %o1
blu,pt %xcc, memcpy
sub %o0, %o1, %g5
add %o1, %o2, %g3
cmp %g3, %o0
bleu,pt %xcc, memcpy
add %o1, %o2, %g5
add %o0, %o2, %o5
sub %g5, 1, %o1
sub %o5, 1, %o0
1: ldub [%o1], %g5
subcc %o2, 1, %o2
sub %o1, 1, %o1
stb %g5, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
retl
mov %g1, %o0
......@@ -9,9 +9,8 @@
#define HI_MAGIC 0x80808080
.align 32
.global strlen, __strlen
.global strlen
strlen:
__strlen:
mov %o0, %o1
andcc %o0, 3, %g0
be,pt %icc, 9f
......
......@@ -8,8 +8,7 @@
.text
.align 4
.global __strncmp, strncmp
__strncmp:
.global strncmp
strncmp:
brlez,pn %o2, 3f
lduba [%o0] (ASI_PNF), %o3
......
/* user_fixup.c: Fix up user copy faults.
*
* Copyright (C) 2004 David S. Miller <davem@redhat.com>
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <asm/uaccess.h>
/* Calculating the exact fault address when using
* block loads and stores can be very complicated.
* Instead of trying to be clever and handling all
* of the cases, just fix things up simply here.
*/
unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
{
char *dst = to;
const char __user *src = from;
while (size--) {
if (__get_user(*dst, src))
break;
dst++;
src++;
}
if (size)
memset(dst, 0, size);
return size;
}
unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
{
char __user *dst = to;
const char *src = from;
while (size--) {
if (__put_user(*src, dst))
break;
dst++;
src++;
}
return size;
}
unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
{
char __user *dst = to;
char __user *src = from;
while (size--) {
char tmp;
if (__get_user(tmp, src))
break;
if (__put_user(tmp, dst))
break;
dst++;
src++;
}
return size;
}
......@@ -228,13 +228,10 @@ extern unsigned long find_next_zero_bit(unsigned long *, unsigned long, unsigned
#define find_first_zero_bit(addr, size) \
find_next_zero_bit((addr), (size), 0)
extern long ___test_and_set_le_bit(int nr, volatile unsigned long *addr);
extern long ___test_and_clear_le_bit(int nr, volatile unsigned long *addr);
#define test_and_set_le_bit(nr,addr) ({___test_and_set_le_bit(nr,addr)!=0;})
#define test_and_clear_le_bit(nr,addr) ({___test_and_clear_le_bit(nr,addr)!=0;})
#define set_le_bit(nr,addr) ((void)___test_and_set_le_bit(nr,addr))
#define clear_le_bit(nr,addr) ((void)___test_and_clear_le_bit(nr,addr))
#define test_and_set_le_bit(nr,addr) \
({ ___test_and_set_bit((nr) ^ 0x38, (addr)) != 0; })
#define test_and_clear_le_bit(nr,addr) \
({ ___test_and_clear_bit((nr) ^ 0x38, (addr)) != 0; })
static __inline__ int test_le_bit(int nr, __const__ unsigned long * addr)
{
......@@ -253,22 +250,30 @@ extern unsigned long find_next_zero_le_bit(unsigned long *, unsigned long, unsig
#ifdef __KERNEL__
#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit_atomic(lock,nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
#define ext2_set_bit(nr,addr) \
test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_set_bit_atomic(lock,nr,addr) \
test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit(nr,addr) \
test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit_atomic(lock,nr,addr) \
test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_test_bit(nr,addr) \
test_le_bit((nr),(unsigned long *)(addr))
#define ext2_find_first_zero_bit(addr, size) \
find_first_zero_le_bit((unsigned long *)(addr), (size))
#define ext2_find_next_zero_bit(addr, size, off) \
find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
/* Bitmap functions for the minix filesystem. */
#define minix_test_and_set_bit(nr,addr) test_and_set_bit((nr),(unsigned long *)(addr))
#define minix_set_bit(nr,addr) set_bit((nr),(unsigned long *)(addr))
#define minix_test_and_set_bit(nr,addr) \
test_and_set_bit((nr),(unsigned long *)(addr))
#define minix_set_bit(nr,addr) \
set_bit((nr),(unsigned long *)(addr))
#define minix_test_and_clear_bit(nr,addr) \
test_and_clear_bit((nr),(unsigned long *)(addr))
#define minix_test_bit(nr,addr) test_bit((nr),(unsigned long *)(addr))
#define minix_test_bit(nr,addr) \
test_bit((nr),(unsigned long *)(addr))
#define minix_find_first_zero_bit(addr,size) \
find_first_zero_bit((unsigned long *)(addr),(size))
......
......@@ -70,6 +70,13 @@ struct pci_iommu {
*/
u32 lowest_consistent_map;
/* In order to deal with some buggy third-party PCI bridges that
* do wrong prefetching, we never mark valid mappings as invalid.
* Instead we point them at this dummy page.
*/
unsigned long dummy_page;
unsigned long dummy_page_pa;
/* If PBM_NCLUSTERS is ever decreased to 4 or lower,
* or if largest supported page_table_sz * 8K goes above
* 2GB, you must increase the size of the type of
......@@ -93,6 +100,8 @@ struct pci_iommu {
u32 dma_addr_mask;
};
extern void pci_iommu_table_init(struct pci_iommu *, int);
/* This describes a PCI bus module's streaming buffer. */
struct pci_strbuf {
int strbuf_enabled; /* Present and using it? */
......
......@@ -15,35 +15,25 @@
#include <asm/asi.h>
extern void __memmove(void *,const void *,__kernel_size_t);
extern void *__memset(void *,int,__kernel_size_t);
extern void *__builtin_memset(void *,int,__kernel_size_t);
#ifndef EXPORT_SYMTAB_STROPS
/* First the mem*() things. */
#define __HAVE_ARCH_BCOPY
#define __HAVE_ARCH_MEMMOVE
#undef memmove
#define memmove(_to, _from, _n) \
({ \
void *_t = (_to); \
__memmove(_t, (_from), (_n)); \
_t; \
})
extern void *memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY
extern void * memcpy(void *,const void *,__kernel_size_t);
extern void *memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *__builtin_memset(void *,int,__kernel_size_t);
static inline void *__constant_memset(void *s, int c, __kernel_size_t count)
{
extern __kernel_size_t __bzero(void *, __kernel_size_t);
if(!c) {
if (!c) {
__bzero(s, count);
return s;
} else
......@@ -61,19 +51,19 @@ static inline void *__constant_memset(void *s, int c, __kernel_size_t count)
#define __HAVE_ARCH_MEMSCAN
#undef memscan
#define memscan(__arg0, __char, __arg2) \
({ \
extern void *__memscan_zero(void *, size_t); \
extern void *__memscan_generic(void *, int, size_t); \
void *__retval, *__addr = (__arg0); \
size_t __size = (__arg2); \
\
if(__builtin_constant_p(__char) && !(__char)) \
__retval = __memscan_zero(__addr, __size); \
else \
__retval = __memscan_generic(__addr, (__char), __size); \
\
__retval; \
#define memscan(__arg0, __char, __arg2) \
({ \
extern void *__memscan_zero(void *, size_t); \
extern void *__memscan_generic(void *, int, size_t); \
void *__retval, *__addr = (__arg0); \
size_t __size = (__arg2); \
\
if(__builtin_constant_p(__char) && !(__char)) \
__retval = __memscan_zero(__addr, __size); \
else \
__retval = __memscan_generic(__addr, (__char), __size); \
\
__retval; \
})
#define __HAVE_ARCH_MEMCMP
......@@ -81,73 +71,10 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
/* Now the str*() stuff... */
#define __HAVE_ARCH_STRLEN
extern __kernel_size_t __strlen(const char *);
extern __kernel_size_t strlen(const char *);
#define __HAVE_ARCH_STRNCMP
extern int __strncmp(const char *, const char *, __kernel_size_t);
static inline int __constant_strncmp(const char *src, const char *dest, __kernel_size_t count)
{
register int retval;
switch(count) {
case 0: return 0;
case 1: return (src[0] - dest[0]);
case 2: retval = (src[0] - dest[0]);
if(!retval && src[0])
retval = (src[1] - dest[1]);
return retval;
case 3: retval = (src[0] - dest[0]);
if(!retval && src[0]) {
retval = (src[1] - dest[1]);
if(!retval && src[1])
retval = (src[2] - dest[2]);
}
return retval;
case 4: retval = (src[0] - dest[0]);
if(!retval && src[0]) {
retval = (src[1] - dest[1]);
if(!retval && src[1]) {
retval = (src[2] - dest[2]);
if (!retval && src[2])
retval = (src[3] - dest[3]);
}
}
return retval;
case 5: retval = (src[0] - dest[0]);
if(!retval && src[0]) {
retval = (src[1] - dest[1]);
if(!retval && src[1]) {
retval = (src[2] - dest[2]);
if (!retval && src[2]) {
retval = (src[3] - dest[3]);
if (!retval && src[3])
retval = (src[4] - dest[4]);
}
}
}
return retval;
default:
retval = (src[0] - dest[0]);
if(!retval && src[0]) {
retval = (src[1] - dest[1]);
if(!retval && src[1]) {
retval = (src[2] - dest[2]);
if(!retval && src[2])
retval = __strncmp(src+3,dest+3,count-3);
}
}
return retval;
}
}
#undef strncmp
#define strncmp(__arg0, __arg1, __arg2) \
(__builtin_constant_p(__arg2) ? \
__constant_strncmp(__arg0, __arg1, __arg2) : \
__strncmp(__arg0, __arg1, __arg2))
extern int strncmp(const char *, const char *, __kernel_size_t);
#endif /* !EXPORT_SYMTAB_STROPS */
......
......@@ -252,18 +252,50 @@ __asm__ __volatile__( \
extern int __get_user_bad(void);
extern unsigned long __copy_from_user(void *to, const void __user *from,
unsigned long size);
extern unsigned long ___copy_from_user(void *to, const void __user *from,
unsigned long size);
extern unsigned long copy_from_user_fixup(void *to, const void __user *from,
unsigned long size);
static inline unsigned long copy_from_user(void *to, const void __user *from,
unsigned long size)
{
unsigned long ret = ___copy_from_user(to, from, size);
extern unsigned long __copy_to_user(void __user *to, const void *from,
unsigned long size);
if (ret)
ret = copy_from_user_fixup(to, from, size);
return ret;
}
#define __copy_from_user copy_from_user
extern unsigned long ___copy_to_user(void __user *to, const void *from,
unsigned long size);
extern unsigned long copy_to_user_fixup(void __user *to, const void *from,
unsigned long size);
static inline unsigned long copy_to_user(void __user *to, const void *from,
unsigned long size)
{
unsigned long ret = ___copy_to_user(to, from, size);
extern unsigned long __copy_in_user(void __user *to, const void __user *from,
unsigned long size);
if (ret)
ret = copy_to_user_fixup(to, from, size);
return ret;
}
#define __copy_to_user copy_to_user
extern unsigned long ___copy_in_user(void __user *to, const void __user *from,
unsigned long size);
extern unsigned long copy_in_user_fixup(void __user *to, void __user *from,
unsigned long size);
static inline unsigned long copy_in_user(void __user *to, void __user *from,
unsigned long size)
{
unsigned long ret = ___copy_in_user(to, from, size);
#define copy_from_user __copy_from_user
#define copy_to_user __copy_to_user
#define copy_in_user __copy_in_user
if (ret)
ret = copy_in_user_fixup(to, from, size);
return ret;
}
#define __copy_in_user copy_in_user
extern unsigned long __bzero_noasi(void __user *, unsigned long);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment