Commit 9c346968 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'linux-5.7' of git://github.com/skeggsb/linux into drm-next

A couple of misc fixes/workarounds for some issues that are causing a
lot of pain for people.

Of most interest are the PCI power management and GR init WARs, which
effect a rather significant number of laptop systems that are in use
today.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Ben Skeggs <skeggsb@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ <CACAvsv5Ef5YKS9EPBH3YUubzvVr++_rzjgSqV_B5nC0L2kB6-Q@mail.gmail.com
parents 3208a24f 374b5580
......@@ -35,7 +35,8 @@
#include <subdev/bios/gpio.h>
#include <subdev/gpio.h>
#include <subdev/timer.h>
#include <nvif/timer.h>
int nv04_dac_output_offset(struct drm_encoder *encoder)
{
......
......@@ -26,6 +26,7 @@
#include "hw.h"
#include <subdev/bios/pll.h>
#include <nvif/timer.h>
#define CHIPSET_NFORCE 0x01a0
#define CHIPSET_NFORCE2 0x01f0
......
......@@ -23,6 +23,7 @@
#include <nvif/cl507c.h>
#include <nvif/event.h>
#include <nvif/timer.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_fourcc.h>
......
......@@ -23,6 +23,7 @@
#include "head.h"
#include <nvif/cl507d.h>
#include <nvif/timer.h>
#include "nouveau_bo.h"
......
......@@ -24,6 +24,8 @@
#include <nouveau_bo.h>
#include <nvif/timer.h>
void
corec37d_wndw_owner(struct nv50_core *core)
{
......
......@@ -24,21 +24,36 @@
#include "head.h"
#include <nvif/cl507a.h>
#include <nvif/timer.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_plane_helper.h>
bool
curs507a_space(struct nv50_wndw *wndw)
{
nvif_msec(&nouveau_drm(wndw->plane.dev)->client.device, 2,
if (nvif_rd32(&wndw->wimm.base.user, 0x0008) >= 4)
return true;
);
WARN_ON(1);
return false;
}
static void
curs507a_update(struct nv50_wndw *wndw, u32 *interlock)
{
nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000);
if (curs507a_space(wndw))
nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000);
}
static void
curs507a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
{
nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 |
asyw->point.x);
if (curs507a_space(wndw)) {
nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 |
asyw->point.x);
}
}
const struct nv50_wimm_func
......
......@@ -25,14 +25,17 @@
static void
cursc37a_update(struct nv50_wndw *wndw, u32 *interlock)
{
nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001);
if (curs507a_space(wndw))
nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001);
}
static void
cursc37a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
{
nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 |
asyw->point.x);
if (curs507a_space(wndw)) {
nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 |
asyw->point.x);
}
}
static const struct nv50_wimm_func
......
......@@ -45,6 +45,7 @@
#include <nvif/cl5070.h>
#include <nvif/cl507d.h>
#include <nvif/event.h>
#include <nvif/timer.h>
#include "nouveau_drv.h"
#include "nouveau_dma.h"
......
......@@ -24,6 +24,8 @@
#include <nouveau_bo.h>
#include <nvif/timer.h>
static void
ovly827e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
{
......
......@@ -97,6 +97,7 @@ struct nv50_wimm_func {
};
extern const struct nv50_wimm_func curs507a;
bool curs507a_space(struct nv50_wndw *);
int wndwc37e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
struct nv50_wndw **);
......
......@@ -23,27 +23,6 @@ int nvif_device_init(struct nvif_object *, u32 handle, s32 oclass, void *, u32,
void nvif_device_fini(struct nvif_device *);
u64 nvif_device_time(struct nvif_device *);
/* Delay based on GPU time (ie. PTIMER).
*
* Will return -ETIMEDOUT unless the loop was terminated with 'break',
* where it will return the number of nanoseconds taken instead.
*/
#define nvif_nsec(d,n,cond...) ({ \
struct nvif_device *_device = (d); \
u64 _nsecs = (n), _time0 = nvif_device_time(_device); \
s64 _taken = 0; \
\
do { \
cond \
} while (_taken = nvif_device_time(_device) - _time0, _taken < _nsecs);\
\
if (_taken >= _nsecs) \
_taken = -ETIMEDOUT; \
_taken; \
})
#define nvif_usec(d,u,cond...) nvif_nsec((d), (u) * 1000, ##cond)
#define nvif_msec(d,m,cond...) nvif_usec((d), (m) * 1000, ##cond)
/*XXX*/
#include <subdev/bios.h>
#include <subdev/fb.h>
......
#ifndef __NVIF_TIMER_H__
#define __NVIF_TIMER_H__
#include <nvif/os.h>
struct nvif_timer_wait {
struct nvif_device *device;
u64 limit;
u64 time0;
u64 time1;
int reads;
};
void nvif_timer_wait_init(struct nvif_device *, u64 nsec,
struct nvif_timer_wait *);
s64 nvif_timer_wait_test(struct nvif_timer_wait *);
/* Delay based on GPU time (ie. PTIMER).
*
* Will return -ETIMEDOUT unless the loop was terminated with 'break',
* where it will return the number of nanoseconds taken instead.
*/
#define nvif_nsec(d,n,cond...) ({ \
struct nvif_timer_wait _wait; \
s64 _taken = 0; \
\
nvif_timer_wait_init((d), (n), &_wait); \
do { \
cond \
} while ((_taken = nvif_timer_wait_test(&_wait)) >= 0); \
\
_taken; \
})
#define nvif_usec(d,u,cond...) nvif_nsec((d), (u) * 1000, ##cond)
#define nvif_msec(d,m,cond...) nvif_usec((d), (m) * 1000, ##cond)
#endif
......@@ -10,6 +10,7 @@ struct nvif_user {
struct nvif_user_func {
void (*doorbell)(struct nvif_user *, u32 token);
u64 (*time)(struct nvif_user *);
};
int nvif_user_init(struct nvif_device *);
......
......@@ -1494,8 +1494,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
ret = nvif_object_map_handle(&mem->mem.object,
&args, argc,
&handle, &length);
if (ret != 1)
return ret ? ret : -EINVAL;
if (ret != 1) {
if (WARN_ON(ret == 0))
return -EINVAL;
if (ret == -ENOSPC)
return -EAGAIN;
return ret;
}
reg->bus.base = 0;
reg->bus.offset = handle;
......
......@@ -222,22 +222,18 @@ nouveau_drm_debugfs_init(struct drm_minor *minor)
{
struct nouveau_drm *drm = nouveau_drm(minor->dev);
struct dentry *dentry;
int i, ret;
int i;
for (i = 0; i < ARRAY_SIZE(nouveau_debugfs_files); i++) {
dentry = debugfs_create_file(nouveau_debugfs_files[i].name,
S_IRUGO | S_IWUSR,
minor->debugfs_root, minor->dev,
nouveau_debugfs_files[i].fops);
if (!dentry)
return -ENOMEM;
debugfs_create_file(nouveau_debugfs_files[i].name,
S_IRUGO | S_IWUSR,
minor->debugfs_root, minor->dev,
nouveau_debugfs_files[i].fops);
}
ret = drm_debugfs_create_files(nouveau_debugfs_list,
NOUVEAU_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
if (ret)
return ret;
drm_debugfs_create_files(nouveau_debugfs_list,
NOUVEAU_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
/* Set the size of the vbios since we know it, and it's confusing to
* userspace if it wants to seek() but the file has a length of 0
......
......@@ -618,6 +618,64 @@ nouveau_drm_device_fini(struct drm_device *dev)
kfree(drm);
}
/*
* On some Intel PCIe bridge controllers doing a
* D0 -> D3hot -> D3cold -> D0 sequence causes Nvidia GPUs to not reappear.
* Skipping the intermediate D3hot step seems to make it work again. This is
* probably caused by not meeting the expectation the involved AML code has
* when the GPU is put into D3hot state before invoking it.
*
* This leads to various manifestations of this issue:
* - AML code execution to power on the GPU hits an infinite loop (as the
* code waits on device memory to change).
* - kernel crashes, as all PCI reads return -1, which most code isn't able
* to handle well enough.
*
* In all cases dmesg will contain at least one line like this:
* 'nouveau 0000:01:00.0: Refused to change power state, currently in D3'
* followed by a lot of nouveau timeouts.
*
* In the \_SB.PCI0.PEG0.PG00._OFF code deeper down writes bit 0x80 to the not
* documented PCI config space register 0x248 of the Intel PCIe bridge
* controller (0x1901) in order to change the state of the PCIe link between
* the PCIe port and the GPU. There are alternative code paths using other
* registers, which seem to work fine (executed pre Windows 8):
* - 0xbc bit 0x20 (publicly available documentation claims 'reserved')
* - 0xb0 bit 0x10 (link disable)
* Changing the conditions inside the firmware by poking into the relevant
* addresses does resolve the issue, but it seemed to be ACPI private memory
* and not any device accessible memory at all, so there is no portable way of
* changing the conditions.
* On a XPS 9560 that means bits [0,3] on \CPEX need to be cleared.
*
* The only systems where this behavior can be seen are hybrid graphics laptops
* with a secondary Nvidia Maxwell, Pascal or Turing GPU. It's unclear whether
* this issue only occurs in combination with listed Intel PCIe bridge
* controllers and the mentioned GPUs or other devices as well.
*
* documentation on the PCIe bridge controller can be found in the
* "7th Generation Intel® Processor Families for H Platforms Datasheet Volume 2"
* Section "12 PCI Express* Controller (x16) Registers"
*/
static void quirk_broken_nv_runpm(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct nouveau_drm *drm = nouveau_drm(dev);
struct pci_dev *bridge = pci_upstream_bridge(pdev);
if (!bridge || bridge->vendor != PCI_VENDOR_ID_INTEL)
return;
switch (bridge->device) {
case 0x1901:
drm->old_pm_cap = pdev->pm_cap;
pdev->pm_cap = 0;
NV_INFO(drm, "Disabling PCI power management to avoid bug\n");
break;
}
}
static int nouveau_drm_probe(struct pci_dev *pdev,
const struct pci_device_id *pent)
{
......@@ -699,6 +757,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
if (ret)
goto fail_drm_dev_init;
quirk_broken_nv_runpm(pdev);
return 0;
fail_drm_dev_init:
......@@ -734,7 +793,11 @@ static void
nouveau_drm_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct nouveau_drm *drm = nouveau_drm(dev);
/* revert our workaround */
if (drm->old_pm_cap)
pdev->pm_cap = drm->old_pm_cap;
nouveau_drm_device_remove(dev);
pci_disable_device(pdev);
}
......
......@@ -140,6 +140,8 @@ struct nouveau_drm {
struct list_head clients;
u8 old_pm_cap;
struct {
struct agp_bridge_data *bridge;
u32 base;
......
......@@ -171,6 +171,11 @@ nouveau_svmm_bind(struct drm_device *dev, void *data,
mm = get_task_mm(current);
down_read(&mm->mmap_sem);
if (!cli->svm.svmm) {
up_read(&mm->mmap_sem);
return -EINVAL;
}
for (addr = args->va_start, end = args->va_start + size; addr < end;) {
struct vm_area_struct *vma;
unsigned long next;
......@@ -179,6 +184,7 @@ nouveau_svmm_bind(struct drm_device *dev, void *data,
if (!vma)
break;
addr = max(addr, vma->vm_start);
next = min(vma->vm_end, end);
/* This is a best effort so we ignore errors */
nouveau_dmem_migrate_vma(cli->drm, vma, addr, next);
......@@ -657,9 +663,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT);
if (start < svmm->unmanaged.limit)
limit = min_t(u64, limit, svmm->unmanaged.start);
else
if (limit > svmm->unmanaged.start)
start = max_t(u64, start, svmm->unmanaged.limit);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
mm = svmm->notifier.mm;
......
......@@ -8,6 +8,7 @@ nvif-y += nvif/fifo.o
nvif-y += nvif/mem.o
nvif-y += nvif/mmu.o
nvif-y += nvif/notify.o
nvif-y += nvif/timer.o
nvif-y += nvif/vmm.o
# Usermode classes
......
......@@ -27,11 +27,15 @@
u64
nvif_device_time(struct nvif_device *device)
{
struct nv_device_time_v0 args = {};
int ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_TIME,
&args, sizeof(args));
WARN_ON_ONCE(ret != 0);
return args.time;
if (!device->user.func) {
struct nv_device_time_v0 args = {};
int ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_TIME,
&args, sizeof(args));
WARN_ON_ONCE(ret != 0);
return args.time;
}
return device->user.func->time(&device->user);
}
void
......
/*
* Copyright 2020 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <nvif/timer.h>
#include <nvif/device.h>
s64
nvif_timer_wait_test(struct nvif_timer_wait *wait)
{
u64 time = nvif_device_time(wait->device);
if (wait->reads == 0) {
wait->time0 = time;
wait->time1 = time;
}
if (wait->time1 == time) {
if (WARN_ON(wait->reads++ == 16))
return -ETIMEDOUT;
} else {
wait->time1 = time;
wait->reads = 1;
}
if (wait->time1 - wait->time0 > wait->limit)
return -ETIMEDOUT;
return wait->time1 - wait->time0;
}
void
nvif_timer_wait_init(struct nvif_device *device, u64 nsec,
struct nvif_timer_wait *wait)
{
wait->device = device;
wait->limit = nsec;
wait->reads = 0;
}
......@@ -21,6 +21,19 @@
*/
#include <nvif/user.h>
static u64
nvif_userc361_time(struct nvif_user *user)
{
u32 hi, lo;
do {
hi = nvif_rd32(&user->object, 0x084);
lo = nvif_rd32(&user->object, 0x080);
} while (hi != nvif_rd32(&user->object, 0x084));
return ((u64)hi << 32 | lo);
}
static void
nvif_userc361_doorbell(struct nvif_user *user, u32 token)
{
......@@ -30,4 +43,5 @@ nvif_userc361_doorbell(struct nvif_user *user, u32 token)
const struct nvif_user_func
nvif_userc361 = {
.doorbell = nvif_userc361_doorbell,
.time = nvif_userc361_time,
};
......@@ -1981,8 +1981,34 @@ gf100_gr_init_(struct nvkm_gr *base)
{
struct gf100_gr *gr = gf100_gr(base);
struct nvkm_subdev *subdev = &base->engine.subdev;
struct nvkm_device *device = subdev->device;
bool reset = device->chipset == 0x137 || device->chipset == 0x138;
u32 ret;
/* On certain GP107/GP108 boards, we trigger a weird issue where
* GR will stop responding to PRI accesses after we've asked the
* SEC2 RTOS to boot the GR falcons. This happens with far more
* frequency when cold-booting a board (ie. returning from D3).
*
* The root cause for this is not known and has proven difficult
* to isolate, with many avenues being dead-ends.
*
* A workaround was discovered by Karol, whereby putting GR into
* reset for an extended period right before initialisation
* prevents the problem from occuring.
*
* XXX: As RM does not require any such workaround, this is more
* of a hack than a true fix.
*/
reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset);
if (reset) {
nvkm_mask(device, 0x000200, 0x00001000, 0x00000000);
nvkm_rd32(device, 0x000200);
msleep(50);
nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
nvkm_rd32(device, 0x000200);
}
nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
ret = nvkm_falcon_get(&gr->fecs.falcon, subdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment