Commit 87a326a3 authored by Francisco Jerez's avatar Francisco Jerez Committed by Ben Skeggs

drm/nv20: Add Z compression support.

Signed-off-by: default avatarFrancisco Jerez <currojerez@riseup.net>
Tested-by: default avatarXavier Chantry <chantry.xavier@gmail.com>
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent a5cf68b0
...@@ -70,6 +70,8 @@ struct nouveau_tile_reg { ...@@ -70,6 +70,8 @@ struct nouveau_tile_reg {
uint32_t addr; uint32_t addr;
uint32_t limit; uint32_t limit;
uint32_t pitch; uint32_t pitch;
uint32_t zcomp;
struct drm_mm_node *tag_mem;
struct nouveau_fence *fence; struct nouveau_fence *fence;
}; };
...@@ -306,6 +308,7 @@ struct nouveau_timer_engine { ...@@ -306,6 +308,7 @@ struct nouveau_timer_engine {
struct nouveau_fb_engine { struct nouveau_fb_engine {
int num_tiles; int num_tiles;
struct drm_mm tag_heap;
int (*init)(struct drm_device *dev); int (*init)(struct drm_device *dev);
void (*takedown)(struct drm_device *dev); void (*takedown)(struct drm_device *dev);
......
...@@ -45,6 +45,11 @@ ...@@ -45,6 +45,11 @@
# define NV04_PFB_REF_CMD_REFRESH (1 << 0) # define NV04_PFB_REF_CMD_REFRESH (1 << 0)
#define NV04_PFB_PRE 0x001002d4 #define NV04_PFB_PRE 0x001002d4
# define NV04_PFB_PRE_CMD_PRECHARGE (1 << 0) # define NV04_PFB_PRE_CMD_PRECHARGE (1 << 0)
#define NV20_PFB_ZCOMP(i) (0x00100300 + 4*(i))
# define NV20_PFB_ZCOMP_MODE_32 (4 << 24)
# define NV20_PFB_ZCOMP_EN (1 << 31)
# define NV25_PFB_ZCOMP_MODE_16 (1 << 20)
# define NV25_PFB_ZCOMP_MODE_32 (2 << 20)
#define NV10_PFB_CLOSE_PAGE2 0x0010033c #define NV10_PFB_CLOSE_PAGE2 0x0010033c
#define NV04_PFB_SCRAMBLE(i) (0x00100400 + 4 * (i)) #define NV04_PFB_SCRAMBLE(i) (0x00100400 + 4 * (i))
#define NV40_PFB_TILE(i) (0x00100600 + (i*16)) #define NV40_PFB_TILE(i) (0x00100600 + (i*16))
...@@ -379,6 +384,7 @@ ...@@ -379,6 +384,7 @@
#define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16)) #define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16))
#define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16)) #define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16))
#define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16)) #define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16))
#define NV20_PGRAPH_ZCOMP(i) (0x00400980 + 4*(i))
#define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16)) #define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16))
#define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16)) #define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16))
#define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16)) #define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16))
......
...@@ -3,21 +3,81 @@ ...@@ -3,21 +3,81 @@
#include "nouveau_drv.h" #include "nouveau_drv.h"
#include "nouveau_drm.h" #include "nouveau_drm.h"
static struct drm_mm_node *
nv20_fb_alloc_tag(struct drm_device *dev, uint32_t size)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
struct drm_mm_node *mem;
int ret;
ret = drm_mm_pre_get(&pfb->tag_heap);
if (ret)
return NULL;
spin_lock(&dev_priv->tile.lock);
mem = drm_mm_search_free(&pfb->tag_heap, size, 0, 0);
if (mem)
mem = drm_mm_get_block_atomic(mem, size, 0);
spin_unlock(&dev_priv->tile.lock);
return mem;
}
static void
nv20_fb_free_tag(struct drm_device *dev, struct drm_mm_node *mem)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
spin_lock(&dev_priv->tile.lock);
drm_mm_put_block(mem);
spin_unlock(&dev_priv->tile.lock);
}
void void
nv10_fb_init_tile_region(struct drm_device *dev, int i, uint32_t addr, nv10_fb_init_tile_region(struct drm_device *dev, int i, uint32_t addr,
uint32_t size, uint32_t pitch, uint32_t flags) uint32_t size, uint32_t pitch, uint32_t flags)
{ {
struct drm_nouveau_private *dev_priv = dev->dev_private; struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
int bpp = (flags & NOUVEAU_GEM_TILE_32BPP ? 32 : 16);
tile->addr = addr; tile->addr = addr;
tile->limit = max(1u, addr + size) - 1; tile->limit = max(1u, addr + size) - 1;
tile->pitch = pitch; tile->pitch = pitch;
if (dev_priv->card_type == NV_20) if (dev_priv->card_type == NV_20) {
tile->addr |= 1; if (flags & NOUVEAU_GEM_TILE_ZETA) {
else /*
* Allocate some of the on-die tag memory,
* used to store Z compression meta-data (most
* likely just a bitmap determining if a given
* tile is compressed or not).
*/
tile->tag_mem = nv20_fb_alloc_tag(dev, size / 256);
if (tile->tag_mem) {
/* Enable Z compression */
if (dev_priv->chipset >= 0x25)
tile->zcomp = tile->tag_mem->start |
(bpp == 16 ?
NV25_PFB_ZCOMP_MODE_16 :
NV25_PFB_ZCOMP_MODE_32);
else
tile->zcomp = tile->tag_mem->start |
NV20_PFB_ZCOMP_EN |
(bpp == 16 ? 0 :
NV20_PFB_ZCOMP_MODE_32);
}
tile->addr |= 3;
} else {
tile->addr |= 1;
}
} else {
tile->addr |= 1 << 31; tile->addr |= 1 << 31;
}
} }
void void
...@@ -26,7 +86,12 @@ nv10_fb_free_tile_region(struct drm_device *dev, int i) ...@@ -26,7 +86,12 @@ nv10_fb_free_tile_region(struct drm_device *dev, int i)
struct drm_nouveau_private *dev_priv = dev->dev_private; struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
tile->addr = tile->limit = tile->pitch = 0; if (tile->tag_mem) {
nv20_fb_free_tag(dev, tile->tag_mem);
tile->tag_mem = NULL;
}
tile->addr = tile->limit = tile->pitch = tile->zcomp = 0;
} }
void void
...@@ -38,6 +103,9 @@ nv10_fb_set_tile_region(struct drm_device *dev, int i) ...@@ -38,6 +103,9 @@ nv10_fb_set_tile_region(struct drm_device *dev, int i)
nv_wr32(dev, NV10_PFB_TLIMIT(i), tile->limit); nv_wr32(dev, NV10_PFB_TLIMIT(i), tile->limit);
nv_wr32(dev, NV10_PFB_TSIZE(i), tile->pitch); nv_wr32(dev, NV10_PFB_TSIZE(i), tile->pitch);
nv_wr32(dev, NV10_PFB_TILE(i), tile->addr); nv_wr32(dev, NV10_PFB_TILE(i), tile->addr);
if (dev_priv->card_type == NV_20)
nv_wr32(dev, NV20_PFB_ZCOMP(i), tile->zcomp);
} }
int int
...@@ -49,6 +117,11 @@ nv10_fb_init(struct drm_device *dev) ...@@ -49,6 +117,11 @@ nv10_fb_init(struct drm_device *dev)
pfb->num_tiles = NV10_PFB_TILE__SIZE; pfb->num_tiles = NV10_PFB_TILE__SIZE;
if (dev_priv->card_type == NV_20)
drm_mm_init(&pfb->tag_heap, 0,
(dev_priv->chipset >= 0x25 ?
64 * 1024 : 32 * 1024));
/* Turn all the tiling regions off. */ /* Turn all the tiling regions off. */
for (i = 0; i < pfb->num_tiles; i++) for (i = 0; i < pfb->num_tiles; i++)
pfb->set_tile_region(dev, i); pfb->set_tile_region(dev, i);
...@@ -59,4 +132,13 @@ nv10_fb_init(struct drm_device *dev) ...@@ -59,4 +132,13 @@ nv10_fb_init(struct drm_device *dev)
void void
nv10_fb_takedown(struct drm_device *dev) nv10_fb_takedown(struct drm_device *dev)
{ {
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
int i;
for (i = 0; i < pfb->num_tiles; i++)
pfb->free_tile_region(dev, i);
if (dev_priv->card_type == NV_20)
drm_mm_takedown(&pfb->tag_heap);
} }
...@@ -526,6 +526,12 @@ nv20_graph_set_tile_region(struct drm_device *dev, int i) ...@@ -526,6 +526,12 @@ nv20_graph_set_tile_region(struct drm_device *dev, int i)
nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->pitch); nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->pitch);
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->addr); nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->addr);
if (dev_priv->card_type == NV_20) {
nv_wr32(dev, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
nv_wr32(dev, NV10_PGRAPH_RDI_DATA, tile->zcomp);
}
} }
int int
...@@ -589,16 +595,17 @@ nv20_graph_init(struct drm_device *dev) ...@@ -589,16 +595,17 @@ nv20_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x40009C , 0x00000040); nv_wr32(dev, 0x40009C , 0x00000040);
if (dev_priv->chipset >= 0x25) { if (dev_priv->chipset >= 0x25) {
nv_wr32(dev, 0x400890, 0x00080000); nv_wr32(dev, 0x400890, 0x00a8cfff);
nv_wr32(dev, 0x400610, 0x304B1FB6); nv_wr32(dev, 0x400610, 0x304B1FB6);
nv_wr32(dev, 0x400B80, 0x18B82880); nv_wr32(dev, 0x400B80, 0x1cbd3883);
nv_wr32(dev, 0x400B84, 0x44000000); nv_wr32(dev, 0x400B84, 0x44000000);
nv_wr32(dev, 0x400098, 0x40000080); nv_wr32(dev, 0x400098, 0x40000080);
nv_wr32(dev, 0x400B88, 0x000000ff); nv_wr32(dev, 0x400B88, 0x000000ff);
} else { } else {
nv_wr32(dev, 0x400880, 0x00080000); /* 0x0008c7df */ nv_wr32(dev, 0x400880, 0x0008c7df);
nv_wr32(dev, 0x400094, 0x00000005); nv_wr32(dev, 0x400094, 0x00000005);
nv_wr32(dev, 0x400B80, 0x45CAA208); /* 0x45eae20e */ nv_wr32(dev, 0x400B80, 0x45eae20e);
nv_wr32(dev, 0x400B84, 0x24000000); nv_wr32(dev, 0x400B84, 0x24000000);
nv_wr32(dev, 0x400098, 0x00000040); nv_wr32(dev, 0x400098, 0x00000040);
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00E00038); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00E00038);
...@@ -611,12 +618,6 @@ nv20_graph_init(struct drm_device *dev) ...@@ -611,12 +618,6 @@ nv20_graph_init(struct drm_device *dev)
for (i = 0; i < NV10_PFB_TILE__SIZE; i++) for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
nv20_graph_set_tile_region(dev, i); nv20_graph_set_tile_region(dev, i);
for (i = 0; i < 8; i++) {
nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
nv_rd32(dev, 0x100300 + i * 4));
}
nv_wr32(dev, 0x4009a0, nv_rd32(dev, 0x100324)); nv_wr32(dev, 0x4009a0, nv_rd32(dev, 0x100324));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA000C); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA000C);
nv_wr32(dev, NV10_PGRAPH_RDI_DATA, nv_rd32(dev, 0x100324)); nv_wr32(dev, NV10_PGRAPH_RDI_DATA, nv_rd32(dev, 0x100324));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment