Commit 97e5268d authored by Ben Skeggs's avatar Ben Skeggs

drm/nouveau/fb/gf100-: rework ram detection

This commit reworks the RAM detection algorithm, using RAM-per-LTC to
determine whether a board has a mixed-memory configuration instead of
using RAM-per-FBPA.  I'm not certain the algorithm is perfect, but it
should handle all currently known configurations in the very least.

This should fix GTX 970 boards with 4GiB of RAM where the last 512MiB
isn't fully accessible, as well as only detecting half the VRAM on
GF108 boards.

As a nice side-effect, GP10x memory detection now reuses the majority
of the code from earlier chipsets.
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent ba4c063d
......@@ -147,6 +147,12 @@ struct nvkm_ram {
};
struct nvkm_ram_func {
u64 upper;
u32 (*probe_fbp)(const struct nvkm_ram_func *, struct nvkm_device *,
int fbp, int *pltcs);
u32 (*probe_fbp_amount)(const struct nvkm_ram_func *, u32 fbpao,
struct nvkm_device *, int fbp, int *pltcs);
u32 (*probe_fbpa_amount)(struct nvkm_device *, int fbpa);
void *(*dtor)(struct nvkm_ram *);
int (*init)(struct nvkm_ram *);
......
......@@ -22,7 +22,12 @@ void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *);
int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *,
struct nvkm_ram **);
int gf100_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *,
u32, struct nvkm_ram *);
struct nvkm_ram *);
u32 gf100_ram_probe_fbp(const struct nvkm_ram_func *,
struct nvkm_device *, int, int *);
u32 gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
struct nvkm_device *, int, int *);
u32 gf100_ram_probe_fbpa_amount(struct nvkm_device *, int);
int gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **);
void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **);
int gf100_ram_init(struct nvkm_ram *);
......@@ -30,14 +35,23 @@ int gf100_ram_calc(struct nvkm_ram *, u32);
int gf100_ram_prog(struct nvkm_ram *);
void gf100_ram_tidy(struct nvkm_ram *);
u32 gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
struct nvkm_device *, int, int *);
int gk104_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *,
struct nvkm_ram **, u32);
struct nvkm_ram **);
void *gk104_ram_dtor(struct nvkm_ram *);
int gk104_ram_init(struct nvkm_ram *);
int gk104_ram_calc(struct nvkm_ram *, u32);
int gk104_ram_prog(struct nvkm_ram *);
void gk104_ram_tidy(struct nvkm_ram *);
u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *,
struct nvkm_device *, int, int *);
u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
struct nvkm_device *, int, int *);
/* RAM type-specific MR calculation routines */
int nvkm_sddr2_calc(struct nvkm_ram *);
int nvkm_sddr3_calc(struct nvkm_ram *);
......
......@@ -543,67 +543,96 @@ gf100_ram_init(struct nvkm_ram *base)
return 0;
}
u32
gf100_ram_probe_fbpa_amount(struct nvkm_device *device, int fbpa)
{
return nvkm_rd32(device, 0x11020c + (fbpa * 0x1000));
}
u32
gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
struct nvkm_device *device, int fbp, int *pltcs)
{
if (!(fbpao & BIT(fbp))) {
*pltcs = 1;
return func->probe_fbpa_amount(device, fbp);
}
return 0;
}
u32
gf100_ram_probe_fbp(const struct nvkm_ram_func *func,
struct nvkm_device *device, int fbp, int *pltcs)
{
u32 fbpao = nvkm_rd32(device, 0x022554);
return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs);
}
int
gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
u32 maskaddr, struct nvkm_ram *ram)
struct nvkm_ram *ram)
{
struct nvkm_subdev *subdev = &fb->subdev;
struct nvkm_device *device = subdev->device;
struct nvkm_bios *bios = device->bios;
const u32 rsvd_head = ( 256 * 1024); /* vga memory */
const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
u32 parts = nvkm_rd32(device, 0x022438);
u32 pmask = nvkm_rd32(device, maskaddr);
u64 bsize = (u64)nvkm_rd32(device, 0x10f20c) << 20;
u64 psize, size = 0;
enum nvkm_ram_type type = nvkm_fb_bios_memtype(bios);
bool uniform = true;
int ret, i;
nvkm_debug(subdev, "100800: %08x\n", nvkm_rd32(device, 0x100800));
nvkm_debug(subdev, "parts %08x mask %08x\n", parts, pmask);
/* read amount of vram attached to each memory controller */
for (i = 0; i < parts; i++) {
if (pmask & (1 << i))
continue;
psize = (u64)nvkm_rd32(device, 0x11020c + (i * 0x1000)) << 20;
if (psize != bsize) {
if (psize < bsize)
bsize = psize;
uniform = false;
u32 fbps = nvkm_rd32(device, 0x022438);
u64 total = 0, lcomm = ~0, lower, ubase, usize;
int ret, fbp, ltcs, ltcn = 0;
nvkm_debug(subdev, "%d FBP(s)\n", fbps);
for (fbp = 0; fbp < fbps; fbp++) {
u32 size = func->probe_fbp(func, device, fbp, &ltcs);
if (size) {
nvkm_debug(subdev, "FBP %d: %4d MiB, %d LTC(s)\n",
fbp, size, ltcs);
lcomm = min(lcomm, (u64)(size / ltcs) << 20);
total += size << 20;
ltcn += ltcs;
} else {
nvkm_debug(subdev, "FBP %d: disabled\n", fbp);
}
nvkm_debug(subdev, "%d: %d MiB\n", i, (u32)(psize >> 20));
size += psize;
}
ret = nvkm_ram_ctor(func, fb, type, size, 0, ram);
lower = lcomm * ltcn;
ubase = lcomm + func->upper;
usize = total - lower;
nvkm_debug(subdev, "Lower: %4lld MiB @ %010llx\n", lower >> 20, 0ULL);
nvkm_debug(subdev, "Upper: %4lld MiB @ %010llx\n", usize >> 20, ubase);
nvkm_debug(subdev, "Total: %4lld MiB\n", total >> 20);
ret = nvkm_ram_ctor(func, fb, type, total, 0, ram);
if (ret)
return ret;
nvkm_mm_fini(&ram->vram);
/* if all controllers have the same amount attached, there's no holes */
if (uniform) {
/* Some GPUs are in what's known as a "mixed memory" configuration.
*
* This is either where some FBPs have more memory than the others,
* or where LTCs have been disabled on a FBP.
*/
if (lower != total) {
/* The common memory amount is addressed normally. */
ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
(size - rsvd_head - rsvd_tail) >>
NVKM_RAM_MM_SHIFT, 1);
(lower - rsvd_head) >> NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
} else {
/* otherwise, address lowest common amount from 0GiB */
ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
((bsize * parts) - rsvd_head) >>
NVKM_RAM_MM_SHIFT, 1);
/* And the rest is much higher in the physical address
* space, and may not be usable for certain operations.
*/
ret = nvkm_mm_init(&ram->vram, ubase >> NVKM_RAM_MM_SHIFT,
(usize - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
/* and the rest starting from (8GiB + common_size) */
ret = nvkm_mm_init(&ram->vram, (0x0200000000ULL + bsize) >>
NVKM_RAM_MM_SHIFT,
(size - (bsize * parts) - rsvd_tail) >>
} else {
/* GPUs without mixed-memory are a lot nicer... */
ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
(total - rsvd_head - rsvd_tail) >>
NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
......@@ -626,7 +655,7 @@ gf100_ram_new_(const struct nvkm_ram_func *func,
return -ENOMEM;
*pram = &ram->base;
ret = gf100_ram_ctor(func, fb, 0x022554, &ram->base);
ret = gf100_ram_ctor(func, fb, &ram->base);
if (ret)
return ret;
......@@ -705,6 +734,10 @@ gf100_ram_new_(const struct nvkm_ram_func *func,
static const struct nvkm_ram_func
gf100_ram = {
.upper = 0x0200000000,
.probe_fbp = gf100_ram_probe_fbp,
.probe_fbp_amount = gf100_ram_probe_fbp_amount,
.probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
.init = gf100_ram_init,
.get = gf100_ram_get,
.put = gf100_ram_put,
......
......@@ -23,8 +23,30 @@
*/
#include "ram.h"
u32
gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
struct nvkm_device *device, int fbp, int *pltcs)
{
u32 fbpt = nvkm_rd32(device, 0x022438);
u32 fbpat = nvkm_rd32(device, 0x02243c);
u32 fbpas = fbpat / fbpt;
u32 fbpa = fbp * fbpas;
u32 size = 0;
while (fbpas--) {
if (!(fbpao & BIT(fbpa)))
size += func->probe_fbpa_amount(device, fbpa);
fbpa++;
}
*pltcs = 1;
return size;
}
static const struct nvkm_ram_func
gf108_ram = {
.upper = 0x0200000000,
.probe_fbp = gf100_ram_probe_fbp,
.probe_fbp_amount = gf108_ram_probe_fbp_amount,
.probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
.init = gf100_ram_init,
.get = gf100_ram_get,
.put = gf100_ram_put,
......
......@@ -1524,7 +1524,7 @@ gk104_ram_dtor(struct nvkm_ram *base)
int
gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
struct nvkm_ram **pram, u32 maskaddr)
struct nvkm_ram **pram)
{
struct nvkm_subdev *subdev = &fb->subdev;
struct nvkm_device *device = subdev->device;
......@@ -1539,7 +1539,7 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
return -ENOMEM;
*pram = &ram->base;
ret = gf100_ram_ctor(func, fb, maskaddr, &ram->base);
ret = gf100_ram_ctor(func, fb, &ram->base);
if (ret)
return ret;
......@@ -1703,6 +1703,10 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
static const struct nvkm_ram_func
gk104_ram = {
.upper = 0x0200000000,
.probe_fbp = gf100_ram_probe_fbp,
.probe_fbp_amount = gf108_ram_probe_fbp_amount,
.probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
.dtor = gk104_ram_dtor,
.init = gk104_ram_init,
.get = gf100_ram_get,
......@@ -1715,5 +1719,5 @@ gk104_ram = {
int
gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
{
return gk104_ram_new_(&gk104_ram, fb, pram, 0x022554);
return gk104_ram_new_(&gk104_ram, fb, pram);
}
......@@ -23,8 +23,20 @@
*/
#include "ram.h"
u32
gm107_ram_probe_fbp(const struct nvkm_ram_func *func,
struct nvkm_device *device, int fbp, int *pltcs)
{
u32 fbpao = nvkm_rd32(device, 0x021c14);
return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs);
}
static const struct nvkm_ram_func
gm107_ram = {
.upper = 0x1000000000,
.probe_fbp = gm107_ram_probe_fbp,
.probe_fbp_amount = gf108_ram_probe_fbp_amount,
.probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
.dtor = gk104_ram_dtor,
.init = gk104_ram_init,
.get = gf100_ram_get,
......@@ -37,5 +49,5 @@ gm107_ram = {
int
gm107_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
{
return gk104_ram_new_(&gm107_ram, fb, pram, 0x021c14);
return gk104_ram_new_(&gm107_ram, fb, pram);
}
......@@ -23,8 +23,35 @@
*/
#include "ram.h"
u32
gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
struct nvkm_device *device, int fbp, int *pltcs)
{
u32 ltcs = nvkm_rd32(device, 0x022450);
u32 fbpas = nvkm_rd32(device, 0x022458);
u32 fbpa = fbp * fbpas;
u32 size = 0;
if (!(nvkm_rd32(device, 0x021d38) & BIT(fbp))) {
u32 ltco = nvkm_rd32(device, 0x021d70 + (fbp * 4));
u32 ltcm = ~ltco & ((1 << ltcs) - 1);
while (fbpas--) {
if (!(fbpao & (1 << fbpa)))
size += func->probe_fbpa_amount(device, fbpa);
fbpa++;
}
*pltcs = hweight32(ltcm);
}
return size;
}
static const struct nvkm_ram_func
gm200_ram = {
.upper = 0x1000000000,
.probe_fbp = gm107_ram_probe_fbp,
.probe_fbp_amount = gm200_ram_probe_fbp_amount,
.probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
.dtor = gk104_ram_dtor,
.init = gk104_ram_init,
.get = gf100_ram_get,
......@@ -37,5 +64,5 @@ gm200_ram = {
int
gm200_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
{
return gk104_ram_new_(&gm200_ram, fb, pram, 0x021c14);
return gk104_ram_new_(&gm200_ram, fb, pram);
}
......@@ -76,8 +76,18 @@ gp100_ram_init(struct nvkm_ram *ram)
return 0;
}
static u32
gp100_ram_probe_fbpa(struct nvkm_device *device, int fbpa)
{
return nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
}
static const struct nvkm_ram_func
gp100_ram_func = {
gp100_ram = {
.upper = 0x1000000000,
.probe_fbp = gm107_ram_probe_fbp,
.probe_fbp_amount = gm200_ram_probe_fbp_amount,
.probe_fbpa_amount = gp100_ram_probe_fbpa,
.init = gp100_ram_init,
.get = gf100_ram_get,
.put = gf100_ram_put,
......@@ -87,60 +97,10 @@ int
gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
{
struct nvkm_ram *ram;
struct nvkm_subdev *subdev = &fb->subdev;
struct nvkm_device *device = subdev->device;
enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
const u32 rsvd_head = ( 256 * 1024); /* vga memory */
const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
u32 fbpa_num = nvkm_rd32(device, 0x02243c), fbpa;
u32 fbio_opt = nvkm_rd32(device, 0x021c14);
u64 part, size = 0, comm = ~0ULL;
bool mixed = false;
int ret;
nvkm_debug(subdev, "02243c: %08x\n", fbpa_num);
nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
if (!(fbio_opt & (1 << fbpa))) {
part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part);
part = part << 20;
if (part != comm) {
if (comm != ~0ULL)
mixed = true;
comm = min(comm, part);
}
size = size + part;
}
}
ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram);
*pram = ram;
if (ret)
return ret;
nvkm_mm_fini(&ram->vram);
if (!(ram = *pram = kzalloc(sizeof(*ram), GFP_KERNEL)))
return -ENOMEM;
if (mixed) {
ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
((comm * fbpa_num) - rsvd_head) >>
NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
return gf100_ram_ctor(&gp100_ram, fb, ram);
ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >>
NVKM_RAM_MM_SHIFT,
(size - (comm * fbpa_num) - rsvd_tail) >>
NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
} else {
ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
(size - rsvd_head - rsvd_tail) >>
NVKM_RAM_MM_SHIFT, 1);
if (ret)
return ret;
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment