Commit 6edc84bc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-2023-09-29' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "Regular pull, this feel suspiciously light so I expect next week might
  be a bit heavier? Let's see how we go. This is from a code point of
  view ivpu and i915 fixes.

  The only other patch is adding Danilo Krummrich to the nouveau
  maintainers, he's agreed to take on more of the roll after Ben
  retired.

  MAINTAINERS:
   - add Danilo for nouveau

  ivpu:
   - Add PCI ids for Arrow Lake
   - Fix memory corruption during IPC
   - Avoid dmesg flooding
   - 40xx: Wait for clock resource
   - 40xx: Fix interrupt usage
   - 40xx: Support caching when loading firmware

  i915:
   - Fix a panic regression on gen8_ggtt_insert_entries
   - Fix load issue due to reservation address in ggtt_reserve_guc_top
   - Fix a possible deadlock with guc busyness worker"

* tag 'drm-fixes-2023-09-29' of git://anongit.freedesktop.org/drm/drm:
  accel/ivpu: Use cached buffers for FW loading
  accel/ivpu/40xx: Fix missing VPUIP interrupts
  accel/ivpu/40xx: Disable frequency change interrupt
  accel/ivpu/40xx: Ensure clock resource ownership Ack before Power-Up
  accel/ivpu: Don't flood dmesg with VPU ready message
  accel/ivpu: Do not use wait event interruptible
  MAINTAINERS: update nouveau maintainers
  i915/guc: Get runtime pm in busyness worker only if already active
  drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top
  i915: Limit the length of an sg list to the requested length
  accel/ivpu: Add Arrow Lake pci id
parents 71e58659 06365a04
......@@ -6647,6 +6647,7 @@ F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c
DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
M: Karol Herbst <kherbst@redhat.com>
M: Lyude Paul <lyude@redhat.com>
M: Danilo Krummrich <dakr@redhat.com>
L: dri-devel@lists.freedesktop.org
L: nouveau@lists.freedesktop.org
S: Supported
......
......@@ -327,7 +327,7 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
}
if (!ret)
ivpu_info(vdev, "VPU ready message received successfully\n");
ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
else
ivpu_hw_diagnose_failure(vdev);
......@@ -634,6 +634,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
static struct pci_device_id ivpu_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_ARL) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_LNL) },
{ }
};
......
......@@ -23,6 +23,7 @@
#define DRIVER_DATE "20230117"
#define PCI_DEVICE_ID_MTL 0x7d1d
#define PCI_DEVICE_ID_ARL 0xad1d
#define PCI_DEVICE_ID_LNL 0x643e
#define IVPU_HW_37XX 37
......@@ -165,6 +166,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev)
{
switch (ivpu_device_id(vdev)) {
case PCI_DEVICE_ID_MTL:
case PCI_DEVICE_ID_ARL:
return IVPU_HW_37XX;
case PCI_DEVICE_ID_LNL:
return IVPU_HW_40XX;
......
......@@ -220,7 +220,8 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (ret)
return ret;
fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size,
DRM_IVPU_BO_CACHED | DRM_IVPU_BO_NOSNOOP);
if (!fw->mem) {
ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
return -ENOMEM;
......@@ -330,7 +331,7 @@ int ivpu_fw_load(struct ivpu_device *vdev)
memset(start, 0, size);
}
wmb(); /* Flush WC buffers after writing fw->mem */
clflush_cache_range(fw->mem->kvaddr, fw->mem->base.size);
return 0;
}
......@@ -432,6 +433,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
if (!ivpu_fw_is_cold_boot(vdev)) {
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K);
return;
}
......@@ -493,7 +495,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
wmb(); /* Flush WC buffers after writing bootparams */
clflush_cache_range(vdev->fw->mem->kvaddr, SZ_4K);
ivpu_fw_boot_params_print(vdev, boot_params);
}
......@@ -8,6 +8,8 @@
#include <drm/drm_gem.h>
#include <drm/drm_mm.h>
#define DRM_IVPU_BO_NOSNOOP 0x10000000
struct dma_buf;
struct ivpu_bo_ops;
struct ivpu_file_priv;
......@@ -83,6 +85,9 @@ static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
{
if (bo->flags & DRM_IVPU_BO_NOSNOOP)
return false;
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
......
......@@ -57,8 +57,7 @@
#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK)
#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \
(REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \
(REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI0_ERR)) | \
(REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, CFI1_ERR)) | \
(REG_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, IMR0_ERR)) | \
......@@ -196,6 +195,14 @@ static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US);
}
static int ivpu_wait_for_clock_own_resource_ack(struct ivpu_device *vdev)
{
if (ivpu_is_simics(vdev))
return 0;
return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, CLOCK_RESOURCE_OWN_ACK, 1, TIMEOUT_US);
}
static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
{
struct ivpu_hw_info *hw = vdev->hw;
......@@ -556,6 +563,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
{
int ret;
ret = ivpu_wait_for_clock_own_resource_ack(vdev);
if (ret) {
ivpu_err(vdev, "Timed out waiting for clock own resource ACK\n");
return ret;
}
ivpu_boot_pwr_island_trickle_drive(vdev, true);
ivpu_boot_pwr_island_drive(vdev, true);
......@@ -1046,9 +1059,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (status == 0)
return IRQ_NONE;
/* Disable global interrupt before handling local buttress interrupts */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
......@@ -1096,9 +1106,6 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
/* This must be done after interrupts are cleared at the source. */
REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
/* Re-enable global interrupt */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
......@@ -1110,9 +1117,14 @@ static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
struct ivpu_device *vdev = ptr;
irqreturn_t ret = IRQ_NONE;
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
if (ret & IRQ_WAKE_THREAD)
return IRQ_WAKE_THREAD;
......
......@@ -70,6 +70,8 @@
#define VPU_40XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0)
#define VPU_40XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1)
#define VPU_40XX_BUTTRESS_VPU_STATUS_DUP_IDLE_MASK BIT_MASK(2)
#define VPU_40XX_BUTTRESS_VPU_STATUS_CLOCK_RESOURCE_OWN_ACK_MASK BIT_MASK(6)
#define VPU_40XX_BUTTRESS_VPU_STATUS_POWER_RESOURCE_OWN_ACK_MASK BIT_MASK(7)
#define VPU_40XX_BUTTRESS_VPU_STATUS_PERF_CLK_MASK BIT_MASK(11)
#define VPU_40XX_BUTTRESS_VPU_STATUS_DISABLE_CLK_RELINQUISH_MASK BIT_MASK(12)
......
......@@ -209,10 +209,10 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_rx_msg *rx_msg;
int wait_ret, ret = 0;
wait_ret = wait_event_interruptible_timeout(cons->rx_msg_wq,
(IS_KTHREAD() && kthread_should_stop()) ||
!list_empty(&cons->rx_msg_list),
msecs_to_jiffies(timeout_ms));
wait_ret = wait_event_timeout(cons->rx_msg_wq,
(IS_KTHREAD() && kthread_should_stop()) ||
!list_empty(&cons->rx_msg_list),
msecs_to_jiffies(timeout_ms));
if (IS_KTHREAD() && kthread_should_stop())
return -EINTR;
......@@ -220,9 +220,6 @@ int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
if (wait_ret == 0)
return -ETIMEDOUT;
if (wait_ret < 0)
return -ERESTARTSYS;
spin_lock_irq(&cons->rx_msg_lock);
rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
if (!rx_msg) {
......
......@@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
st->nents = 0;
for (i = 0; i < page_count; i++) {
struct folio *folio;
unsigned long nr_pages;
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
0,
......@@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
}
} while (1);
nr_pages = min_t(unsigned long,
folio_nr_pages(folio), page_count - i);
if (!i ||
sg->length >= max_segment ||
folio_pfn(folio) != next_pfn) {
......@@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
sg = sg_next(sg);
st->nents++;
sg_set_folio(sg, folio, folio_size(folio), 0);
sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
} else {
/* XXX: could overflow? */
sg->length += folio_size(folio);
sg->length += nr_pages * PAGE_SIZE;
}
next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
i += folio_nr_pages(folio) - 1;
next_pfn = folio_pfn(folio) + nr_pages;
i += nr_pages - 1;
/* Check that the i965g/gm workaround works. */
GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
......
......@@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
/*
* Reserve the top of the GuC address space for firmware images. Addresses
* beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
* which makes for a suitable range to hold GuC/HuC firmware images if the
* size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
* is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
* of the same size anyway, which is far more than needed, to keep the logic
* in uc_fw_ggtt_offset() simple.
*/
#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
{
u64 size;
u64 offset;
int ret;
if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
size = ggtt->vm.total - GUC_GGTT_TOP;
GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
PIN_NOEVICT);
ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
GUC_TOP_RESERVE_SIZE, offset,
I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
if (ret)
drm_dbg(&ggtt->vm.i915->drm,
"Failed to reserve top of GGTT for GuC\n");
......
......@@ -1432,6 +1432,36 @@ static void guc_timestamp_ping(struct work_struct *wrk)
unsigned long index;
int srcu, ret;
/*
* Ideally the busyness worker should take a gt pm wakeref because the
* worker only needs to be active while gt is awake. However, the
* gt_park path cancels the worker synchronously and this complicates
* the flow if the worker is also running at the same time. The cancel
* waits for the worker and when the worker releases the wakeref, that
* would call gt_park and would lead to a deadlock.
*
* The resolution is to take the global pm wakeref if runtime pm is
* already active. If not, we don't need to update the busyness stats as
* the stats would already be updated when the gt was parked.
*
* Note:
* - We do not requeue the worker if we cannot take a reference to runtime
* pm since intel_guc_busyness_unpark would requeue the worker in the
* resume path.
*
* - If the gt was parked longer than time taken for GT timestamp to roll
* over, we ignore those rollovers since we don't care about tracking
* the exact GT time. We only care about roll overs when the gt is
* active and running workloads.
*
* - There is a window of time between gt_park and runtime suspend,
* where the worker may run. This is acceptable since the worker will
* not find any new data to update busyness.
*/
wakeref = intel_runtime_pm_get_if_active(&gt->i915->runtime_pm);
if (!wakeref)
return;
/*
* Synchronize with gt reset to make sure the worker does not
* corrupt the engine/guc stats. NB: can't actually block waiting
......@@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
*/
ret = intel_gt_reset_trylock(gt, &srcu);
if (ret)
return;
goto err_trylock;
with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
__update_guc_busyness_stats(guc);
__update_guc_busyness_stats(guc);
/* adjust context stats for overflow */
xa_for_each(&guc->context_lookup, index, ce)
......@@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
intel_gt_reset_unlock(gt, srcu);
guc_enable_busyness_worker(guc);
err_trylock:
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
static int guc_action_enable_usage_stats(struct intel_guc *guc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment