Commit 53887e3d authored by Dave Jones's avatar Dave Jones Committed by Linus Torvalds

[PATCH] prefetching too far in mem copies

This patch from 2.4 makes sure we don't prefetch past the
end of a range to be copied (in case its at the end of a memrange)
i386 case looks safe already, we just weren't optimal for the last
chunk to be copied.

Andi. same change needed for x86-64.

    Dave.
parent 3fe2c413
...@@ -89,6 +89,30 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) ...@@ -89,6 +89,30 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
from+=64; from+=64;
to+=64; to+=64;
} }
for(; i>0; i--)
{
__asm__ __volatile__ (
" movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
/* /*
* Now do the tail of the block * Now do the tail of the block
*/ */
......
...@@ -62,11 +62,34 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) ...@@ -62,11 +62,34 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"\n" "\n"
: : "r" (from) ); : : "r" (from) );
for(; i>5; i--)
{
__asm__ __volatile__ (
" prefetch 320(%0)\n"
" movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
for(; i>0; i--) for(; i>0; i--)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
" prefetch 320(%0)\n"
" movq (%0), %%mm0\n" " movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n" " movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n" " movq 16(%0), %%mm2\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment