Commit 4607694a authored by Richard Henderson's avatar Richard Henderson

[TGAFB] Implement the fb_imageblit hook.

Speeds up rendering of text by around 7x for 8bpp cards,
as you'd expect from the difference in the volume of data
passed across the bus.  Thus the win should be about 31x
for 32bpp cards.
parent 6c56ca7b
...@@ -39,6 +39,9 @@ static int tgafb_setcolreg(unsigned, unsigned, unsigned, unsigned, ...@@ -39,6 +39,9 @@ static int tgafb_setcolreg(unsigned, unsigned, unsigned, unsigned,
unsigned, struct fb_info *); unsigned, struct fb_info *);
static int tgafb_blank(int, struct fb_info *); static int tgafb_blank(int, struct fb_info *);
static void tgafb_init_fix(struct fb_info *); static void tgafb_init_fix(struct fb_info *);
static void tgafb_imageblit(struct fb_info *, struct fb_image *);
static int tgafb_pci_register(struct pci_dev *, const struct pci_device_id *); static int tgafb_pci_register(struct pci_dev *, const struct pci_device_id *);
#ifdef MODULE #ifdef MODULE
static void tgafb_pci_unregister(struct pci_dev *); static void tgafb_pci_unregister(struct pci_dev *);
...@@ -59,7 +62,7 @@ static struct fb_ops tgafb_ops = { ...@@ -59,7 +62,7 @@ static struct fb_ops tgafb_ops = {
.fb_blank = tgafb_blank, .fb_blank = tgafb_blank,
.fb_fillrect = cfb_fillrect, .fb_fillrect = cfb_fillrect,
.fb_copyarea = cfb_copyarea, .fb_copyarea = cfb_copyarea,
.fb_imageblit = cfb_imageblit, .fb_imageblit = tgafb_imageblit,
.fb_cursor = soft_cursor, .fb_cursor = soft_cursor,
}; };
...@@ -499,6 +502,256 @@ tgafb_blank(int blank, struct fb_info *info) ...@@ -499,6 +502,256 @@ tgafb_blank(int blank, struct fb_info *info)
} }
/*
* Acceleration.
*/
static void
tgafb_imageblit(struct fb_info *info, struct fb_image *image)
{
static unsigned char const bitrev[256] = {
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
};
struct tga_par *par = (struct tga_par *) info->par;
u32 fgcolor, bgcolor, dx, dy, width, height, vxres, vyres, pixelmask;
unsigned long rincr, line_length, shift, pos, is8bpp;
unsigned long i, j, k;
const unsigned char *data;
void *regs_base, *fb_base;
dx = image->dx;
dy = image->dy;
width = image->width;
height = image->height;
vxres = info->var.xres_virtual;
vyres = info->var.yres_virtual;
line_length = info->fix.line_length;
rincr = (width + 7) / 8;
/* Crop the image to the screen. */
if (dx > vxres || dy > vyres)
return;
if (dx + width > vxres)
width = vxres - dx;
if (dy + height > vyres)
height = vyres - dy;
/* For copies that aren't pixel expansion, there's little we
can do better than the generic code. */
/* ??? There is a DMA write mode; I wonder if that could be
made to pull the data from the image buffer... */
if (image->depth > 1) {
cfb_imageblit(info, image);
return;
}
regs_base = par->tga_regs_base;
fb_base = par->tga_fb_base;
is8bpp = par->tga_type == TGA_TYPE_8PLANE;
/* Expand the color values to fill 32-bits. */
/* ??? Would be nice to notice colour changes elsewhere, so
that we can do this only when necessary. */
fgcolor = image->fg_color;
bgcolor = image->bg_color;
if (is8bpp) {
fgcolor |= fgcolor << 8;
fgcolor |= fgcolor << 16;
bgcolor |= bgcolor << 8;
bgcolor |= bgcolor << 16;
} else {
fgcolor = ((u32 *)info->pseudo_palette)[fgcolor];
bgcolor = ((u32 *)info->pseudo_palette)[bgcolor];
}
__raw_writel(fgcolor, regs_base + TGA_FOREGROUND_REG);
__raw_writel(bgcolor, regs_base + TGA_BACKGROUND_REG);
/* Acquire proper alignment; set up the PIXELMASK register
so that we only write the proper character cell. */
pos = dy * line_length + dx;
if (is8bpp) {
shift = pos & 3;
pos &= -4;
} else {
shift = (pos & 7) >> 2;
pos &= -8;
}
data = (const unsigned char *) image->data;
/* Enable opaque stipple mode. */
__raw_writel((is8bpp
? TGA_MODE_SBM_8BPP | TGA_MODE_OPAQUE_STIPPLE
: TGA_MODE_SBM_24BPP | TGA_MODE_OPAQUE_STIPPLE),
regs_base + TGA_MODE_REG);
if (width + shift <= 32) {
unsigned long bwidth;
/* Handle common case of imaging a single character, in
a font less than 32 pixels wide. */
pixelmask = (1 << width) - 1;
pixelmask <<= shift;
__raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
wmb();
bwidth = (width + 7) / 8;
for (i = 0; i < height; ++i) {
u32 mask = 0;
/* The image data is bit big endian; we need
little endian. */
for (j = 0; j < bwidth; ++j)
mask |= bitrev[data[j]] << (j * 8);
__raw_writel(mask << shift, fb_base + pos);
pos += line_length;
data += rincr;
}
wmb();
__raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
} else if (shift == 0) {
unsigned long pos0 = pos;
const unsigned char *data0 = data;
unsigned long bincr = (is8bpp ? 8 : 8*4);
unsigned long bwidth;
/* Handle another common case in which accel_putcs
generates a large bitmap, which happens to be aligned.
Allow the tail to be misaligned. This case is
interesting because we've not got to hold partial
bytes across the words being written. */
wmb();
bwidth = (width / 8) & -4;
for (i = 0; i < height; ++i) {
for (j = 0; j < bwidth; j += 4) {
u32 mask = 0;
for (k = 0; k < 4; ++k)
mask |= bitrev[data[j+k]] << (k * 8);
__raw_writel(mask, fb_base + pos + j*bincr);
}
pos += line_length;
data += rincr;
}
wmb();
pixelmask = (1ul << (width & 31)) - 1;
if (pixelmask) {
__raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
wmb();
pos = pos0 + bwidth*bincr;
data = data0 + bwidth;
bwidth = ((width & 31) + 7) / 8;
for (i = 0; i < height; ++i) {
u32 mask = 0;
for (k = 0; k < bwidth; ++k)
mask |= bitrev[data[k]] << (k * 8);
__raw_writel(mask, fb_base + pos);
pos += line_length;
data += rincr;
}
wmb();
__raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
}
} else {
unsigned long pos0 = pos;
const unsigned char *data0 = data;
unsigned long bincr = (is8bpp ? 8 : 8*4);
unsigned long bwidth;
/* Finally, handle the generic case of misaligned start.
Here we split the write into 16-bit spans. This allows
us to use only one pixel mask, instead of four as would
be required by writing 24-bit spans. */
pixelmask = 0xffff << shift;
__raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
wmb();
bwidth = (width / 8) & -2;
for (i = 0; i < height; ++i) {
for (j = 0; j < bwidth; j += 2) {
u32 mask;
mask = bitrev[data[j]];
mask |= bitrev[data[j+1]] << 8;
mask <<= shift;
__raw_writel(mask, fb_base + pos + j*bincr);
}
pos += line_length;
data += rincr;
}
wmb();
pixelmask = ((1ul << (width & 15)) - 1) << shift;
if (pixelmask) {
__raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
wmb();
pos = pos0 + bwidth*bincr;
data = data0 + bwidth;
bwidth = (width & 15) > 8;
for (i = 0; i < height; ++i) {
u32 mask = bitrev[data[0]];
if (bwidth)
mask |= bitrev[data[1]] << 8;
mask <<= shift;
__raw_writel(mask, fb_base + pos);
pos += line_length;
data += rincr;
}
wmb();
}
__raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
}
/* Disable opaque stipple mode. */
__raw_writel((is8bpp
? TGA_MODE_SBM_8BPP | TGA_MODE_SIMPLE
: TGA_MODE_SBM_24BPP | TGA_MODE_SIMPLE),
regs_base + TGA_MODE_REG);
}
/* /*
* Initialisation * Initialisation
*/ */
......
...@@ -31,7 +31,10 @@ ...@@ -31,7 +31,10 @@
#define TGA_24PLANE_FB_OFFSET 0x0804000 #define TGA_24PLANE_FB_OFFSET 0x0804000
#define TGA_24PLUSZ_FB_OFFSET 0x1004000 #define TGA_24PLUSZ_FB_OFFSET 0x1004000
#define TGA_FOREGROUND_REG 0x0020
#define TGA_BACKGROUND_REG 0x0024
#define TGA_PLANEMASK_REG 0x0028 #define TGA_PLANEMASK_REG 0x0028
#define TGA_PIXELMASK_ONESHOT_REG 0x002c
#define TGA_MODE_REG 0x0030 #define TGA_MODE_REG 0x0030
#define TGA_RASTEROP_REG 0x0034 #define TGA_RASTEROP_REG 0x0034
#define TGA_PIXELSHIFT_REG 0x0038 #define TGA_PIXELSHIFT_REG 0x0038
...@@ -47,6 +50,14 @@ ...@@ -47,6 +50,14 @@
#define TGA_RAMDAC_SETUP_REG 0x00c0 #define TGA_RAMDAC_SETUP_REG 0x00c0
#define TGA_BLOCK_COLOR0_REG 0x0140 #define TGA_BLOCK_COLOR0_REG 0x0140
#define TGA_BLOCK_COLOR1_REG 0x0144 #define TGA_BLOCK_COLOR1_REG 0x0144
#define TGA_BLOCK_COLOR2_REG 0x0148
#define TGA_BLOCK_COLOR3_REG 0x014c
#define TGA_BLOCK_COLOR4_REG 0x0150
#define TGA_BLOCK_COLOR5_REG 0x0154
#define TGA_BLOCK_COLOR6_REG 0x0158
#define TGA_BLOCK_COLOR7_REG 0x015c
#define TGA_COPY64_SRC 0x0160
#define TGA_COPY64_DST 0x0164
#define TGA_CLOCK_REG 0x01e8 #define TGA_CLOCK_REG 0x01e8
#define TGA_RAMDAC_REG 0x01f0 #define TGA_RAMDAC_REG 0x01f0
#define TGA_CMD_STAT_REG 0x01f8 #define TGA_CMD_STAT_REG 0x01f8
...@@ -76,6 +87,22 @@ ...@@ -76,6 +87,22 @@
#define TGA_VALID_BLANK 0x02 #define TGA_VALID_BLANK 0x02
#define TGA_VALID_CURSOR 0x04 #define TGA_VALID_CURSOR 0x04
#define TGA_MODE_SBM_8BPP 0x000
#define TGA_MODE_SBM_24BPP 0x300
#define TGA_MODE_SIMPLE 0x00
#define TGA_MODE_SIMPLEZ 0x10
#define TGA_MODE_OPAQUE_STIPPLE 0x01
#define TGA_MODE_OPAQUE_FILL 0x21
#define TGA_MODE_TRANSPARENT_STIPPLE 0x03
#define TGA_MODE_TRANSPARENT_FILL 0x23
#define TGA_MODE_BLOCK_STIPPLE 0x0d
#define TGA_MODE_BLOCK_FILL 0x2d
#define TGA_MODE_COPY 0x07
#define TGA_MODE_DMA_READ_COPY_ND 0x17
#define TGA_MODE_DMA_READ_COPY_D 0x37
#define TGA_MODE_DMA_WRITE_COPY 0x1f
/* /*
* Useful defines for managing the ICS1562 PLL clock * Useful defines for managing the ICS1562 PLL clock
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment