drm/nouveau/gr/gf117-: make ppc_nr[gpc] accurate

We're going to be pulling in a chunk of code from NVGPU to fixup our SMID mappings on Volta and above, which depends on ppc_nr[gpc] reflecting the actual number of PPCs present, not the maximum number. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Lyude Paul <lyude@redhat.com>

drm/nouveau/gr/gf117-: make ppc_nr[gpc] accurate
We're going to be pulling in a chunk of code from NVGPU to fixup our SMID mappings on Volta and above, which depends on ppc_nr[gpc] reflecting the actual number of PPCs present, not the maximum number. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Lyude Paul <lyude@redhat.com>
0c520ad4 · Ben Skeggs · 9aa3face · 0c520ad4 · 0c520ad4 · 0c520ad4
Commit 0c520ad4 authored Jun 01, 2022 by Ben Skeggs
8 changed files
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -257,7 +257,7 @@ gf117_grctx_generate_attrib(struct gf100_gr_chan *chan)
 	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			const u32 a = alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -912,7 +912,7 @@ gm107_grctx_generate_attrib(struct gf100_gr_chan *chan)
 	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 u = 0x418ea0 + (n * 0x04);

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -87,7 +87,7 @@ gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
 	int gpc, ppc, i;

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
 			u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
 			while (ppc_tpcs-- > gr->ppc_tpc_min)

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -56,7 +56,7 @@ gp100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -55,7 +55,7 @@ gp102_grctx_generate_attrib(struct gf100_gr_chan *chan)
 	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -75,7 +75,7 @@ gv100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -2003,12 +2003,14 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
 		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
 		gr->tpc_total += gr->tpc_nr[i];
-		gr->ppc_nr[i]  = gr->func->ppc_nr;
-		for (j = 0; j < gr->ppc_nr[i]; j++) {
+		for (j = 0; j < gr->func->ppc_nr; j++) {
 			gr->ppc_tpc_mask[i][j] =
 				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
 			if (gr->ppc_tpc_mask[i][j] == 0)
 				continue;
+
+			gr->ppc_nr[i]++;
+
 			gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
 			if (gr->ppc_tpc_min == 0 ||

--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -418,7 +418,7 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr)
 	int gpc, ppc;

 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
 			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);