Commit 78a43c7e authored by Ben Skeggs's avatar Ben Skeggs

drm/nouveau/gr/gf100-: make global attrib_cb actually global

This was thought to be per-channel initially - it's not.  The backing
pages for the VMM mappings are shared for all channels.

- switches to more straight-forward patch interfaces
- prepares for sub-context support
- this is saving a *sizeable* amount of vram

v2:
- whitespace
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
Reviewed-by: default avatarLyude Paul <lyude@redhat.com>
parent 5eee9fdd
...@@ -1003,45 +1003,6 @@ gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data) ...@@ -1003,45 +1003,6 @@ gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data)
nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
} }
int
gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv)
{
if (info->data) {
info->buffer[info->buffer_nr] = round_up(info->addr, align);
info->addr = info->buffer[info->buffer_nr] + size;
info->data->size = size;
info->data->align = align;
info->data->priv = priv;
info->data++;
return info->buffer_nr++;
}
return -1;
}
void
gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
int shift, int buffer)
{
struct nvkm_device *device = info->gr->base.engine.subdev.device;
if (info->data) {
if (shift >= 0) {
info->mmio->addr = addr;
info->mmio->data = data;
info->mmio->shift = shift;
info->mmio->buffer = buffer;
if (buffer >= 0)
data |= info->buffer[buffer] >> shift;
info->mmio++;
} else
return;
} else {
if (buffer >= 0)
return;
}
nvkm_wr32(device, addr, data);
}
void void
gf100_grctx_generate_r419cb8(struct gf100_gr *gr) gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
{ {
...@@ -1068,31 +1029,41 @@ gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) ...@@ -1068,31 +1029,41 @@ gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
} }
void void
gf100_grctx_generate_attrib(struct gf100_grctx *info) gf100_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 attrib = grctx->attrib_nr; const u32 attrib = grctx->attrib_nr;
const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
const int s = 12;
const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
int gpc, tpc; int gpc, tpc;
u32 bo = 0; u32 bo = 0;
mmio_refn(info, 0x418810, 0x80000000, s, b); gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16));
mmio_refn(info, 0x419848, 0x10000000, s, b);
mmio_wr32(info, 0x405830, (attrib << 16));
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
const u32 o = TPC_UNIT(gpc, tpc, 0x0520); const u32 o = TPC_UNIT(gpc, tpc, 0x0520);
mmio_skip(info, o, (attrib << 16) | ++bo);
mmio_wr32(info, o, (attrib << 16) | --bo); gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo);
bo += grctx->attrib_nr_max; bo += grctx->attrib_nr_max;
} }
} }
} }
void
gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
{
gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12);
gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12);
}
u32
gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
{
const struct gf100_grctx_func *grctx = gr->func->grctx;
return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total;
}
void void
gf100_grctx_generate_unkn(struct gf100_gr *gr) gf100_grctx_generate_unkn(struct gf100_gr *gr)
{ {
...@@ -1368,7 +1339,7 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) ...@@ -1368,7 +1339,7 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
} }
void void
gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) gf100_grctx_generate_main(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = chan->gr; struct gf100_gr *gr = chan->gr;
struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_device *device = gr->base.engine.subdev.device;
...@@ -1394,7 +1365,8 @@ gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) ...@@ -1394,7 +1365,8 @@ gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
grctx->pagepool(chan, chan->pagepool->addr); grctx->pagepool(chan, chan->pagepool->addr);
grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
grctx->attrib(info); grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
grctx->attrib(chan);
if (grctx->patch_ltc) if (grctx->patch_ltc)
grctx->patch_ltc(chan); grctx->patch_ltc(chan);
if (grctx->unknown_size) if (grctx->unknown_size)
...@@ -1450,7 +1422,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk ...@@ -1450,7 +1422,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
struct nvkm_device *device = subdev->device; struct nvkm_device *device = subdev->device;
struct nvkm_memory *data = NULL; struct nvkm_memory *data = NULL;
struct nvkm_vma *ctx = NULL; struct nvkm_vma *ctx = NULL;
struct gf100_grctx info;
int ret, i; int ret, i;
u64 addr; u64 addr;
...@@ -1500,13 +1471,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk ...@@ -1500,13 +1471,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED));
nvkm_done(inst); nvkm_done(inst);
/* Setup default state for mmio list construction. */
info.gr = gr;
info.data = gr->mmio_data;
info.mmio = gr->mmio_list;
info.addr = ctx->addr;
info.buffer_nr = 0;
/* Make channel current. */ /* Make channel current. */
addr = inst->addr >> 12; addr = inst->addr >> 12;
if (gr->firmware) { if (gr->firmware) {
...@@ -1530,7 +1494,7 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk ...@@ -1530,7 +1494,7 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
); );
} }
grctx->main(chan, &info); grctx->main(chan);
/* Trigger a context unload by unsetting the "next channel valid" bit /* Trigger a context unload by unsetting the "next channel valid" bit
* and faking a context switch interrupt. * and faking a context switch interrupt.
...@@ -1582,6 +1546,8 @@ gf100_grctx = { ...@@ -1582,6 +1546,8 @@ gf100_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf100_grctx_generate_attrib, .attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -3,28 +3,12 @@ ...@@ -3,28 +3,12 @@
#define __NVKM_GRCTX_NVC0_H__ #define __NVKM_GRCTX_NVC0_H__
#include "gf100.h" #include "gf100.h"
struct gf100_grctx {
struct gf100_gr *gr;
struct gf100_gr_data *data;
struct gf100_gr_mmio *mmio;
int buffer_nr;
u64 buffer[4];
u64 addr;
};
int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv);
void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int);
void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data); void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data);
#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d))
#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e))
#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1)
#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1)
struct gf100_grctx_func { struct gf100_grctx_func {
void (*unkn88c)(struct gf100_gr *, bool on); void (*unkn88c)(struct gf100_gr *, bool on);
/* main context generation function */ /* main context generation function */
void (*main)(struct gf100_gr_chan *, struct gf100_grctx *); void (*main)(struct gf100_gr_chan *);
/* context-specific modify-on-first-load list generation function */ /* context-specific modify-on-first-load list generation function */
void (*unkn)(struct gf100_gr *); void (*unkn)(struct gf100_gr *);
/* mmio context data */ /* mmio context data */
...@@ -47,7 +31,9 @@ struct gf100_grctx_func { ...@@ -47,7 +31,9 @@ struct gf100_grctx_func {
void (*pagepool)(struct gf100_gr_chan *, u64 addr); void (*pagepool)(struct gf100_gr_chan *, u64 addr);
u32 pagepool_size; u32 pagepool_size;
/* attribute(/alpha) circular buffer */ /* attribute(/alpha) circular buffer */
void (*attrib)(struct gf100_grctx *); u32 (*attrib_cb_size)(struct gf100_gr *);
void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size);
void (*attrib)(struct gf100_gr_chan *);
u32 attrib_nr_max; u32 attrib_nr_max;
u32 attrib_nr; u32 attrib_nr;
u32 alpha_nr_max; u32 alpha_nr_max;
...@@ -86,10 +72,12 @@ struct gf100_grctx_func { ...@@ -86,10 +72,12 @@ struct gf100_grctx_func {
extern const struct gf100_grctx_func gf100_grctx; extern const struct gf100_grctx_func gf100_grctx;
int gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst); int gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst);
void gf100_grctx_generate_main(struct gf100_gr_chan *, struct gf100_grctx *); void gf100_grctx_generate_main(struct gf100_gr_chan *);
void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
void gf100_grctx_generate_attrib(struct gf100_grctx *); u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *);
void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
void gf100_grctx_generate_attrib(struct gf100_gr_chan *);
void gf100_grctx_generate_unkn(struct gf100_gr *); void gf100_grctx_generate_unkn(struct gf100_gr *);
void gf100_grctx_generate_floorsweep(struct gf100_gr *); void gf100_grctx_generate_floorsweep(struct gf100_gr *);
void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
...@@ -101,14 +89,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *); ...@@ -101,14 +89,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
void gf100_grctx_generate_r419cb8(struct gf100_gr *); void gf100_grctx_generate_r419cb8(struct gf100_gr *);
extern const struct gf100_grctx_func gf108_grctx; extern const struct gf100_grctx_func gf108_grctx;
void gf108_grctx_generate_attrib(struct gf100_grctx *); void gf108_grctx_generate_attrib(struct gf100_gr_chan *);
void gf108_grctx_generate_unkn(struct gf100_gr *); void gf108_grctx_generate_unkn(struct gf100_gr *);
extern const struct gf100_grctx_func gf104_grctx; extern const struct gf100_grctx_func gf104_grctx;
extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf110_grctx;
extern const struct gf100_grctx_func gf117_grctx; extern const struct gf100_grctx_func gf117_grctx;
void gf117_grctx_generate_attrib(struct gf100_grctx *); void gf117_grctx_generate_attrib(struct gf100_gr_chan *);
void gf117_grctx_generate_rop_mapping(struct gf100_gr *); void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
...@@ -134,7 +122,8 @@ extern const struct gf100_grctx_func gk208_grctx; ...@@ -134,7 +122,8 @@ extern const struct gf100_grctx_func gk208_grctx;
extern const struct gf100_grctx_func gm107_grctx; extern const struct gf100_grctx_func gm107_grctx;
void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64); void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
void gm107_grctx_generate_attrib(struct gf100_grctx *); void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
void gm107_grctx_generate_attrib(struct gf100_gr_chan *);
void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
extern const struct gf100_grctx_func gm200_grctx; extern const struct gf100_grctx_func gm200_grctx;
...@@ -148,10 +137,12 @@ extern const struct gf100_grctx_func gm20b_grctx; ...@@ -148,10 +137,12 @@ extern const struct gf100_grctx_func gm20b_grctx;
extern const struct gf100_grctx_func gp100_grctx; extern const struct gf100_grctx_func gp100_grctx;
void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
void gp100_grctx_generate_smid_config(struct gf100_gr *); void gp100_grctx_generate_smid_config(struct gf100_gr *);
extern const struct gf100_grctx_func gp102_grctx; extern const struct gf100_grctx_func gp102_grctx;
void gp102_grctx_generate_attrib(struct gf100_grctx *); u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *);
void gp102_grctx_generate_attrib(struct gf100_gr_chan *);
extern const struct gf100_grctx_func gp104_grctx; extern const struct gf100_grctx_func gp104_grctx;
...@@ -163,7 +154,8 @@ extern const struct gf100_grctx_func tu102_grctx; ...@@ -163,7 +154,8 @@ extern const struct gf100_grctx_func tu102_grctx;
void gv100_grctx_unkn88c(struct gf100_gr *, bool); void gv100_grctx_unkn88c(struct gf100_gr *, bool);
void gv100_grctx_generate_unkn(struct gf100_gr *); void gv100_grctx_generate_unkn(struct gf100_gr *);
extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[]; extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[];
void gv100_grctx_generate_attrib(struct gf100_grctx *); void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
void gv100_grctx_generate_attrib(struct gf100_gr_chan *);
void gv100_grctx_generate_rop_mapping(struct gf100_gr *); void gv100_grctx_generate_rop_mapping(struct gf100_gr *);
void gv100_grctx_generate_r400088(struct gf100_gr *, bool); void gv100_grctx_generate_r400088(struct gf100_gr *, bool);
......
...@@ -94,6 +94,8 @@ gf104_grctx = { ...@@ -94,6 +94,8 @@ gf104_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf100_grctx_generate_attrib, .attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = { ...@@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = {
******************************************************************************/ ******************************************************************************/
void void
gf108_grctx_generate_attrib(struct gf100_grctx *info) gf108_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 beta = grctx->attrib_nr; const u32 beta = grctx->attrib_nr;
const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
const int s = 12;
const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
const int timeslice_mode = 1; const int timeslice_mode = 1;
const int max_batches = 0xffff; const int max_batches = 0xffff;
u32 bo = 0; u32 bo = 0;
u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
int gpc, tpc; int gpc, tpc;
mmio_refn(info, 0x418810, 0x80000000, s, b); gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
mmio_refn(info, 0x419848, 0x10000000, s, b); gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
mmio_wr32(info, 0x405830, (beta << 16) | alpha);
mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
...@@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info)
const u32 b = beta; const u32 b = beta;
const u32 t = timeslice_mode; const u32 t = timeslice_mode;
const u32 o = TPC_UNIT(gpc, tpc, 0x500); const u32 o = TPC_UNIT(gpc, tpc, 0x500);
mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo);
mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo); gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo);
bo += grctx->attrib_nr_max; bo += grctx->attrib_nr_max;
mmio_wr32(info, o + 0x44, (a << 16) | ao); gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao);
ao += grctx->alpha_nr_max; ao += grctx->alpha_nr_max;
} }
} }
...@@ -795,6 +790,8 @@ gf108_grctx = { ...@@ -795,6 +790,8 @@ gf108_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf108_grctx_generate_attrib, .attrib = gf108_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -342,6 +342,8 @@ gf110_grctx = { ...@@ -342,6 +342,8 @@ gf110_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf100_grctx_generate_attrib, .attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -241,25 +241,20 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) ...@@ -241,25 +241,20 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
} }
void void
gf117_grctx_generate_attrib(struct gf100_grctx *info) gf117_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 beta = grctx->attrib_nr; const u32 beta = grctx->attrib_nr;
const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
const int s = 12;
const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
const int timeslice_mode = 1; const int timeslice_mode = 1;
const int max_batches = 0xffff; const int max_batches = 0xffff;
u32 bo = 0; u32 bo = 0;
u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
int gpc, ppc; int gpc, ppc;
mmio_refn(info, 0x418810, 0x80000000, s, b); gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
mmio_refn(info, 0x419848, 0x10000000, s, b); gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
mmio_wr32(info, 0x405830, (beta << 16) | alpha);
mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
...@@ -267,12 +262,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -267,12 +262,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
const u32 b = beta * gr->ppc_tpc_nr[gpc][ppc]; const u32 b = beta * gr->ppc_tpc_nr[gpc][ppc];
const u32 t = timeslice_mode; const u32 t = timeslice_mode;
const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 o = PPC_UNIT(gpc, ppc, 0);
if (!(gr->ppc_mask[gpc] & (1 << ppc))) if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue; continue;
mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo); gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo);
bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, o + 0xe4, (a << 16) | ao); gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
} }
} }
...@@ -294,6 +290,8 @@ gf117_grctx = { ...@@ -294,6 +290,8 @@ gf117_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -510,6 +510,8 @@ gf119_grctx = { ...@@ -510,6 +510,8 @@ gf119_grctx = {
.bundle_size = 0x1800, .bundle_size = 0x1800,
.pagepool = gf100_grctx_generate_pagepool, .pagepool = gf100_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf108_grctx_generate_attrib, .attrib = gf108_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -981,6 +981,8 @@ gk104_grctx = { ...@@ -981,6 +981,8 @@ gk104_grctx = {
.bundle_token_limit = 0x600, .bundle_token_limit = 0x600,
.pagepool = gk104_grctx_generate_pagepool, .pagepool = gk104_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -838,6 +838,8 @@ gk110_grctx = { ...@@ -838,6 +838,8 @@ gk110_grctx = {
.bundle_token_limit = 0x7c0, .bundle_token_limit = 0x7c0,
.pagepool = gk104_grctx_generate_pagepool, .pagepool = gk104_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -87,6 +87,8 @@ gk110b_grctx = { ...@@ -87,6 +87,8 @@ gk110b_grctx = {
.bundle_token_limit = 0x600, .bundle_token_limit = 0x600,
.pagepool = gk104_grctx_generate_pagepool, .pagepool = gk104_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -553,6 +553,8 @@ gk208_grctx = { ...@@ -553,6 +553,8 @@ gk208_grctx = {
.bundle_token_limit = 0x200, .bundle_token_limit = 0x200,
.pagepool = gk104_grctx_generate_pagepool, .pagepool = gk104_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x324, .attrib_nr_max = 0x324,
.attrib_nr = 0x218, .attrib_nr = 0x218,
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <subdev/mc.h> #include <subdev/mc.h>
static void static void
gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) gk20a_grctx_generate_main(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = chan->gr; struct gf100_gr *gr = chan->gr;
struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_device *device = gr->base.engine.subdev.device;
...@@ -39,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) ...@@ -39,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
grctx->attrib(info); grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
grctx->attrib(chan);
grctx->unkn(gr); grctx->unkn(gr);
...@@ -75,6 +76,8 @@ gk20a_grctx = { ...@@ -75,6 +76,8 @@ gk20a_grctx = {
.bundle_token_limit = 0x100, .bundle_token_limit = 0x100,
.pagepool = gk104_grctx_generate_pagepool, .pagepool = gk104_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gf100_grctx_generate_attrib_cb,
.attrib = gf117_grctx_generate_attrib, .attrib = gf117_grctx_generate_attrib,
.attrib_nr_max = 0x240, .attrib_nr_max = 0x240,
.attrib_nr = 0x240, .attrib_nr = 0x240,
......
...@@ -897,25 +897,19 @@ gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) ...@@ -897,25 +897,19 @@ gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
} }
void void
gm107_grctx_generate_attrib(struct gf100_grctx *info) gm107_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr; const u32 attrib = grctx->attrib_nr;
const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
const int s = 12;
const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
const int max_batches = 0xffff; const int max_batches = 0xffff;
u32 bo = 0; u32 bo = 0;
u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
int gpc, ppc, n = 0; int gpc, ppc, n = 0;
mmio_refn(info, 0x418810, 0x80000000, s, b); gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha);
mmio_refn(info, 0x419848, 0x10000000, s, b); gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
mmio_refn(info, 0x419c2c, 0x10000000, s, b);
mmio_wr32(info, 0x405830, (attrib << 16) | alpha);
mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
...@@ -923,19 +917,29 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -923,19 +917,29 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info)
const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
const u32 u = 0x418ea0 + (n * 0x04); const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 o = PPC_UNIT(gpc, ppc, 0);
if (!(gr->ppc_mask[gpc] & (1 << ppc))) if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue; continue;
mmio_wr32(info, o + 0xc0, bs);
mmio_wr32(info, o + 0xf4, bo); gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, o + 0xe4, as); gf100_grctx_patch_wr32(chan, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao); gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, u, ((bs / 3) << 16) | bs); gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs);
} }
} }
} }
void
gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
{
gf100_grctx_generate_attrib_cb(chan, addr, size);
gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12);
}
static void static void
gm107_grctx_generate_r406500(struct gf100_gr *gr) gm107_grctx_generate_r406500(struct gf100_gr *gr)
{ {
...@@ -969,6 +973,8 @@ gm107_grctx = { ...@@ -969,6 +973,8 @@ gm107_grctx = {
.bundle_token_limit = 0x2c0, .bundle_token_limit = 0x2c0,
.pagepool = gm107_grctx_generate_pagepool, .pagepool = gm107_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gm107_grctx_generate_attrib_cb,
.attrib = gm107_grctx_generate_attrib, .attrib = gm107_grctx_generate_attrib,
.attrib_nr_max = 0xff0, .attrib_nr_max = 0xff0,
.attrib_nr = 0xaa0, .attrib_nr = 0xaa0,
......
...@@ -111,6 +111,8 @@ gm200_grctx = { ...@@ -111,6 +111,8 @@ gm200_grctx = {
.bundle_token_limit = 0x780, .bundle_token_limit = 0x780,
.pagepool = gm107_grctx_generate_pagepool, .pagepool = gm107_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gm107_grctx_generate_attrib_cb,
.attrib = gm107_grctx_generate_attrib, .attrib = gm107_grctx_generate_attrib,
.attrib_nr_max = 0x600, .attrib_nr_max = 0x600,
.attrib_nr = 0x400, .attrib_nr = 0x400,
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "ctxgf100.h" #include "ctxgf100.h"
static void static void
gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) gm20b_grctx_generate_main(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = chan->gr; struct gf100_gr *gr = chan->gr;
struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_device *device = gr->base.engine.subdev.device;
...@@ -36,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) ...@@ -36,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
grctx->attrib(info); grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
grctx->attrib(chan);
grctx->unkn(gr); grctx->unkn(gr);
...@@ -78,6 +79,8 @@ gm20b_grctx = { ...@@ -78,6 +79,8 @@ gm20b_grctx = {
.bundle_token_limit = 0x1c0, .bundle_token_limit = 0x1c0,
.pagepool = gm107_grctx_generate_pagepool, .pagepool = gm107_grctx_generate_pagepool,
.pagepool_size = 0x8000, .pagepool_size = 0x8000,
.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
.attrib_cb = gm107_grctx_generate_attrib_cb,
.attrib = gm107_grctx_generate_attrib, .attrib = gm107_grctx_generate_attrib,
.attrib_nr_max = 0x600, .attrib_nr_max = 0x600,
.attrib_nr = 0x400, .attrib_nr = 0x400,
......
...@@ -39,32 +39,21 @@ gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) ...@@ -39,32 +39,21 @@ gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
} }
static void static void
gp100_grctx_generate_attrib(struct gf100_grctx *info) gp100_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr; const u32 attrib = grctx->attrib_nr;
const int s = 12;
const int max_batches = 0xffff; const int max_batches = 0xffff;
u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 size = grctx->alpha_nr_max * gr->tpc_total;
u32 ao = 0; u32 ao = 0;
u32 bo = ao + size; u32 bo = ao + size;
int gpc, ppc, b, n = 0; int gpc, ppc, n = 0;
for (gpc = 0; gpc < gr->gpc_nr; gpc++) gf100_grctx_patch_wr32(chan, 0x405830, attrib);
size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max; gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
size = ((size * 0x20) + 128) & ~127; gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
b = mmio_vram(info, size, (1 << s), false);
mmio_refn(info, 0x418810, 0x80000000, s, b);
mmio_refn(info, 0x419848, 0x10000000, s, b);
mmio_refn(info, 0x419c2c, 0x10000000, s, b);
mmio_refn(info, 0x419b00, 0x00000000, s, b);
mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
mmio_wr32(info, 0x405830, attrib);
mmio_wr32(info, 0x40585c, alpha);
mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
...@@ -72,21 +61,45 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -72,21 +61,45 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
const u32 bs = attrib * gr->ppc_tpc_max; const u32 bs = attrib * gr->ppc_tpc_max;
const u32 u = 0x418ea0 + (n * 0x04); const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 o = PPC_UNIT(gpc, ppc, 0);
if (!(gr->ppc_mask[gpc] & (1 << ppc))) if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue; continue;
mmio_wr32(info, o + 0xc0, bs);
mmio_wr32(info, o + 0xf4, bo); gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
mmio_wr32(info, o + 0xf0, bs); gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
bo += grctx->attrib_nr_max * gr->ppc_tpc_max; bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
mmio_wr32(info, o + 0xe4, as); gf100_grctx_patch_wr32(chan, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao); gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, u, bs); gf100_grctx_patch_wr32(chan, u, bs);
} }
} }
mmio_wr32(info, 0x418eec, 0x00000000); gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000);
mmio_wr32(info, 0x41befc, 0x00000000); gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000);
}
void
gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
{
gm107_grctx_generate_attrib_cb(chan, addr, size);
gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12);
gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7);
}
static u32
gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
{
const struct gf100_grctx_func *grctx = gr->func->grctx;
u32 size = grctx->alpha_nr_max * gr->tpc_total;
int gpc;
for (gpc = 0; gpc < gr->gpc_nr; gpc++)
size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max;
return ((size * 0x20) + 128) & ~127;
} }
void void
...@@ -120,6 +133,8 @@ gp100_grctx = { ...@@ -120,6 +133,8 @@ gp100_grctx = {
.bundle_token_limit = 0x1080, .bundle_token_limit = 0x1080,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp100_grctx_generate_attrib_cb_size,
.attrib_cb = gp100_grctx_generate_attrib_cb,
.attrib = gp100_grctx_generate_attrib, .attrib = gp100_grctx_generate_attrib,
.attrib_nr_max = 0x660, .attrib_nr_max = 0x660,
.attrib_nr = 0x440, .attrib_nr = 0x440,
......
...@@ -37,33 +37,22 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr) ...@@ -37,33 +37,22 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr)
} }
void void
gp102_grctx_generate_attrib(struct gf100_grctx *info) gp102_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr; const u32 attrib = grctx->attrib_nr;
const u32 gfxp = grctx->gfxp_nr; const u32 gfxp = grctx->gfxp_nr;
const int s = 12;
const int max_batches = 0xffff; const int max_batches = 0xffff;
u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 size = grctx->alpha_nr_max * gr->tpc_total;
u32 ao = 0; u32 ao = 0;
u32 bo = ao + size; u32 bo = ao + size;
int gpc, ppc, b, n = 0; int gpc, ppc, n = 0;
for (gpc = 0; gpc < gr->gpc_nr; gpc++) gf100_grctx_patch_wr32(chan, 0x405830, attrib);
size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
size = ((size * 0x20) + 128) & ~127; gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
b = mmio_vram(info, size, (1 << s), false);
mmio_refn(info, 0x418810, 0x80000000, s, b);
mmio_refn(info, 0x419848, 0x10000000, s, b);
mmio_refn(info, 0x419c2c, 0x10000000, s, b);
mmio_refn(info, 0x419b00, 0x00000000, s, b);
mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
mmio_wr32(info, 0x405830, attrib);
mmio_wr32(info, 0x40585c, alpha);
mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
...@@ -73,22 +62,37 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -73,22 +62,37 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info)
const u32 u = 0x418ea0 + (n * 0x04); const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 o = PPC_UNIT(gpc, ppc, 0);
const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
if (!(gr->ppc_mask[gpc] & (1 << ppc))) if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue; continue;
mmio_wr32(info, o + 0xc0, gs);
mmio_wr32(info, p, bs); gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
mmio_wr32(info, o + 0xf4, bo); gf100_grctx_patch_wr32(chan, p, bs);
mmio_wr32(info, o + 0xf0, bs); gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
bo += gs; bo += gs;
mmio_wr32(info, o + 0xe4, as); gf100_grctx_patch_wr32(chan, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao); gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, u, bs); gf100_grctx_patch_wr32(chan, u, bs);
} }
} }
mmio_wr32(info, 0x4181e4, 0x00000100); gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
mmio_wr32(info, 0x41befc, 0x00000100); gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
}
u32
gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
{
const struct gf100_grctx_func *grctx = gr->func->grctx;
u32 size = grctx->alpha_nr_max * gr->tpc_total;
int gpc;
for (gpc = 0; gpc < gr->gpc_nr; gpc++)
size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max;
return ((size * 0x20) + 127) & ~127;
} }
const struct gf100_grctx_func const struct gf100_grctx_func
...@@ -101,6 +105,8 @@ gp102_grctx = { ...@@ -101,6 +105,8 @@ gp102_grctx = {
.bundle_token_limit = 0x900, .bundle_token_limit = 0x900,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
.attrib_cb = gp100_grctx_generate_attrib_cb,
.attrib = gp102_grctx_generate_attrib, .attrib = gp102_grctx_generate_attrib,
.attrib_nr_max = 0x4b0, .attrib_nr_max = 0x4b0,
.attrib_nr = 0x320, .attrib_nr = 0x320,
......
...@@ -31,6 +31,8 @@ gp104_grctx = { ...@@ -31,6 +31,8 @@ gp104_grctx = {
.bundle_token_limit = 0x900, .bundle_token_limit = 0x900,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
.attrib_cb = gp100_grctx_generate_attrib_cb,
.attrib = gp102_grctx_generate_attrib, .attrib = gp102_grctx_generate_attrib,
.attrib_nr_max = 0x4b0, .attrib_nr_max = 0x4b0,
.attrib_nr = 0x320, .attrib_nr = 0x320,
......
...@@ -39,6 +39,8 @@ gp107_grctx = { ...@@ -39,6 +39,8 @@ gp107_grctx = {
.bundle_token_limit = 0x300, .bundle_token_limit = 0x300,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
.attrib_cb = gp100_grctx_generate_attrib_cb,
.attrib = gp102_grctx_generate_attrib, .attrib = gp102_grctx_generate_attrib,
.attrib_nr_max = 0x15de, .attrib_nr_max = 0x15de,
.attrib_nr = 0x540, .attrib_nr = 0x540,
......
...@@ -59,31 +59,20 @@ gv100_grctx_pack_sw_veid_bundle_init[] = { ...@@ -59,31 +59,20 @@ gv100_grctx_pack_sw_veid_bundle_init[] = {
}; };
void void
gv100_grctx_generate_attrib(struct gf100_grctx *info) gv100_grctx_generate_attrib(struct gf100_gr_chan *chan)
{ {
struct gf100_gr *gr = info->gr; struct gf100_gr *gr = chan->gr;
const struct gf100_grctx_func *grctx = gr->func->grctx; const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr; const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr; const u32 attrib = grctx->attrib_nr;
const u32 gfxp = grctx->gfxp_nr; const u32 gfxp = grctx->gfxp_nr;
const int s = 12;
u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 size = grctx->alpha_nr_max * gr->tpc_total;
u32 ao = 0; u32 ao = 0;
u32 bo = ao + size; u32 bo = ao + size;
int gpc, ppc, b, n = 0; int gpc, ppc, n = 0;
for (gpc = 0; gpc < gr->gpc_nr; gpc++) gf100_grctx_patch_wr32(chan, 0x405830, attrib);
size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
size = ((size * 0x20) + 127) & ~127;
b = mmio_vram(info, size, (1 << s), false);
mmio_refn(info, 0x418810, 0x80000000, s, b);
mmio_refn(info, 0x419848, 0x10000000, s, b);
mmio_refn(info, 0x419c2c, 0x10000000, s, b);
mmio_refn(info, 0x419e00, 0x00000000, s, b);
mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
mmio_wr32(info, 0x405830, attrib);
mmio_wr32(info, 0x40585c, alpha);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
...@@ -92,21 +81,32 @@ gv100_grctx_generate_attrib(struct gf100_grctx *info) ...@@ -92,21 +81,32 @@ gv100_grctx_generate_attrib(struct gf100_grctx *info)
const u32 gs = gfxp * gr->ppc_tpc_max; const u32 gs = gfxp * gr->ppc_tpc_max;
const u32 u = 0x418ea0 + (n * 0x04); const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 o = PPC_UNIT(gpc, ppc, 0);
if (!(gr->ppc_mask[gpc] & (1 << ppc))) if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue; continue;
mmio_wr32(info, o + 0xc0, gs);
mmio_wr32(info, o + 0xf4, bo); gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
mmio_wr32(info, o + 0xf0, bs); gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
bo += gs; bo += gs;
mmio_wr32(info, o + 0xe4, as); gf100_grctx_patch_wr32(chan, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao); gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
mmio_wr32(info, u, bs); gf100_grctx_patch_wr32(chan, u, bs);
} }
} }
mmio_wr32(info, 0x4181e4, 0x00000100); gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
mmio_wr32(info, 0x41befc, 0x00000100); gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
}
void
gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
{
gm107_grctx_generate_attrib_cb(chan, addr, size);
gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12);
gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7);
} }
void void
...@@ -198,6 +198,8 @@ gv100_grctx = { ...@@ -198,6 +198,8 @@ gv100_grctx = {
.bundle_token_limit = 0x1680, .bundle_token_limit = 0x1680,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
.attrib_cb = gv100_grctx_generate_attrib_cb,
.attrib = gv100_grctx_generate_attrib, .attrib = gv100_grctx_generate_attrib,
.attrib_nr_max = 0x6c0, .attrib_nr_max = 0x6c0,
.attrib_nr = 0x480, .attrib_nr = 0x480,
......
...@@ -74,6 +74,8 @@ tu102_grctx = { ...@@ -74,6 +74,8 @@ tu102_grctx = {
.bundle_token_limit = 0xa80, .bundle_token_limit = 0xa80,
.pagepool = gp100_grctx_generate_pagepool, .pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000, .pagepool_size = 0x20000,
.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
.attrib_cb = gv100_grctx_generate_attrib_cb,
.attrib = gv100_grctx_generate_attrib, .attrib = gv100_grctx_generate_attrib,
.attrib_nr_max = 0x800, .attrib_nr_max = 0x800,
.attrib_nr = 0x700, .attrib_nr = 0x700,
......
...@@ -355,16 +355,11 @@ static void * ...@@ -355,16 +355,11 @@ static void *
gf100_gr_chan_dtor(struct nvkm_object *object) gf100_gr_chan_dtor(struct nvkm_object *object)
{ {
struct gf100_gr_chan *chan = gf100_gr_chan(object); struct gf100_gr_chan *chan = gf100_gr_chan(object);
int i;
for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
nvkm_memory_unref(&chan->data[i].mem);
}
nvkm_vmm_put(chan->vmm, &chan->mmio_vma); nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
nvkm_memory_unref(&chan->mmio); nvkm_memory_unref(&chan->mmio);
nvkm_vmm_put(chan->vmm, &chan->attrib_cb);
nvkm_vmm_put(chan->vmm, &chan->unknown); nvkm_vmm_put(chan->vmm, &chan->unknown);
nvkm_vmm_put(chan->vmm, &chan->bundle_cb); nvkm_vmm_put(chan->vmm, &chan->bundle_cb);
nvkm_vmm_put(chan->vmm, &chan->pagepool); nvkm_vmm_put(chan->vmm, &chan->pagepool);
...@@ -384,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, ...@@ -384,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
struct nvkm_object **pobject) struct nvkm_object **pobject)
{ {
struct gf100_gr *gr = gf100_gr(base); struct gf100_gr *gr = gf100_gr(base);
struct gf100_gr_data *data = gr->mmio_data;
struct gf100_gr_mmio *mmio = gr->mmio_list;
struct gf100_gr_chan *chan; struct gf100_gr_chan *chan;
struct gf100_vmm_map_v0 args = { .priv = 1 }; struct gf100_vmm_map_v0 args = { .priv = 1 };
struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_device *device = gr->base.engine.subdev.device;
int ret, i; int ret;
if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
return -ENOMEM; return -ENOMEM;
...@@ -416,6 +409,22 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, ...@@ -416,6 +409,22 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
if (ret) if (ret)
return ret; return ret;
/* Map attribute circular buffer. */
ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb);
if (ret)
return ret;
if (device->card_type < GP100) {
ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0);
if (ret)
return ret;
} else {
ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
&args, sizeof(args));;
if (ret)
return ret;
}
/* Map some context buffer of unknown purpose. */ /* Map some context buffer of unknown purpose. */
if (gr->func->grctx->unknown_size) { if (gr->func->grctx->unknown_size) {
ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown); ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown);
...@@ -457,47 +466,12 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, ...@@ -457,47 +466,12 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
if (ret) if (ret)
return ret; return ret;
/* allocate buffers referenced by mmio list */
for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
data->size, data->align, false,
&chan->data[i].mem);
if (ret)
return ret;
ret = nvkm_vmm_get(fifoch->vmm, 12,
nvkm_memory_size(chan->data[i].mem),
&chan->data[i].vma);
if (ret)
return ret;
args.priv = data->priv;
ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
chan->data[i].vma, &args, sizeof(args));
if (ret)
return ret;
data++;
}
/* finally, fill in the mmio list and point the context at it */ /* finally, fill in the mmio list and point the context at it */
nvkm_kmap(chan->mmio); nvkm_kmap(chan->mmio);
gr->func->grctx->pagepool(chan, chan->pagepool->addr); gr->func->grctx->pagepool(chan, chan->pagepool->addr);
gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size); gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size);
for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) { gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr));
u32 addr = mmio->addr; gr->func->grctx->attrib(chan);
u32 data = mmio->data;
if (mmio->buffer >= 0) {
u64 info = chan->data[mmio->buffer].vma->addr;
data |= info >> mmio->shift;
}
nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
mmio++;
}
if (gr->func->grctx->patch_ltc) if (gr->func->grctx->patch_ltc)
gr->func->grctx->patch_ltc(chan); gr->func->grctx->patch_ltc(chan);
if (gr->func->grctx->unknown_size) if (gr->func->grctx->unknown_size)
...@@ -2015,6 +1989,11 @@ gf100_gr_oneinit(struct nvkm_gr *base) ...@@ -2015,6 +1989,11 @@ gf100_gr_oneinit(struct nvkm_gr *base)
if (ret) if (ret)
return ret; return ret;
ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr),
0x1000, false, &gr->attrib_cb);
if (ret)
return ret;
if (gr->func->grctx->unknown_size) { if (gr->func->grctx->unknown_size) {
ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size, ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size,
0x100, false, &gr->unknown); 0x100, false, &gr->unknown);
...@@ -2092,6 +2071,7 @@ gf100_gr_dtor(struct nvkm_gr *base) ...@@ -2092,6 +2071,7 @@ gf100_gr_dtor(struct nvkm_gr *base)
kfree(gr->data); kfree(gr->data);
nvkm_memory_unref(&gr->unknown); nvkm_memory_unref(&gr->unknown);
nvkm_memory_unref(&gr->attrib_cb);
nvkm_memory_unref(&gr->bundle_cb); nvkm_memory_unref(&gr->bundle_cb);
nvkm_memory_unref(&gr->pagepool); nvkm_memory_unref(&gr->pagepool);
......
...@@ -44,19 +44,6 @@ struct nvkm_acr_lsfw; ...@@ -44,19 +44,6 @@ struct nvkm_acr_lsfw;
#define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r)) #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r))
#define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r)) #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
struct gf100_gr_data {
u32 size;
u32 align;
bool priv;
};
struct gf100_gr_mmio {
u32 addr;
u32 data;
u32 shift;
int buffer;
};
struct gf100_gr_zbc_color { struct gf100_gr_zbc_color {
u32 format; u32 format;
u32 ds[4]; u32 ds[4];
...@@ -123,6 +110,7 @@ struct gf100_gr { ...@@ -123,6 +110,7 @@ struct gf100_gr {
struct nvkm_memory *pagepool; struct nvkm_memory *pagepool;
struct nvkm_memory *bundle_cb; struct nvkm_memory *bundle_cb;
struct nvkm_memory *attrib_cb;
struct nvkm_memory *unknown; struct nvkm_memory *unknown;
u8 screen_tile_row_offset; u8 screen_tile_row_offset;
...@@ -134,8 +122,6 @@ struct gf100_gr { ...@@ -134,8 +122,6 @@ struct gf100_gr {
} sm[TPC_MAX]; } sm[TPC_MAX];
u8 sm_nr; u8 sm_nr;
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size; u32 size;
u32 *data; u32 *data;
u32 size_zcull; u32 size_zcull;
...@@ -264,16 +250,12 @@ struct gf100_gr_chan { ...@@ -264,16 +250,12 @@ struct gf100_gr_chan {
struct nvkm_vma *pagepool; struct nvkm_vma *pagepool;
struct nvkm_vma *bundle_cb; struct nvkm_vma *bundle_cb;
struct nvkm_vma *attrib_cb;
struct nvkm_vma *unknown; struct nvkm_vma *unknown;
struct nvkm_memory *mmio; struct nvkm_memory *mmio;
struct nvkm_vma *mmio_vma; struct nvkm_vma *mmio_vma;
int mmio_nr; int mmio_nr;
struct {
struct nvkm_memory *mem;
struct nvkm_vma *vma;
} data[4];
}; };
void gf100_gr_ctxctl_debug(struct gf100_gr *); void gf100_gr_ctxctl_debug(struct gf100_gr *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment