• Rob Clark's avatar
    drm/msm/gpu: Push gpu lock down past runpm · abe2023b
    Rob Clark authored
    Avoid holding gpu lock when calling runpm, to avoid this lockdep splat:
    
       ======================================================
       WARNING: possible circular locking dependency detected
       6.4.3-debug+ #14 Not tainted
       ------------------------------------------------------
       ring0/373 is trying to acquire lock:
       ffffffead86efb98 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x70/0x98
    
       but task is already holding lock:
       ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm]
    
       which lock already depends on the new lock.
    
       the existing dependency chain (in reverse order) is:
    
       -> #4 (&gpu->lock){+.+.}-{3:3}:
              __mutex_lock+0xc8/0x388
              mutex_lock_nested+0x2c/0x38
              msm_job_run+0x7c/0x128 [msm]
              drm_sched_main+0x264/0x354 [gpu_sched]
              kthread+0xf0/0x100
              ret_from_fork+0x10/0x20
    
       -> #3 (dma_fence_map){++++}-{0:0}:
              __dma_fence_might_wait+0x74/0xc0
              dma_resv_lockdep+0x1f0/0x2e8
              do_one_initcall+0xb4/0x214
              kernel_init_freeable+0x338/0x33c
              kernel_init+0x30/0x134
              ret_from_fork+0x10/0x20
    
       -> #2 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}:
              fs_reclaim_acquire+0x7c/0x9c
              slab_pre_alloc_hook.constprop.0+0x40/0x250
              __kmem_cache_alloc_node+0x60/0x18c
              kmalloc_node_trace+0x40/0x84
              alloc_worker+0x2c/0x64
              init_rescuer+0x34/0xe0
              workqueue_init+0x168/0x1fc
              kernel_init_freeable+0x15c/0x33c
              kernel_init+0x30/0x134
              ret_from_fork+0x10/0x20
    
       -> #1 (fs_reclaim){+.+.}-{0:0}:
              __fs_reclaim_acquire+0x3c/0x48
              fs_reclaim_acquire+0x50/0x9c
              slab_pre_alloc_hook.constprop.0+0x40/0x250
              __kmem_cache_alloc_node+0x60/0x18c
              kmalloc_trace+0x44/0x88
              clk_rcg2_dfs_determine_rate+0x60/0x214
              clk_core_determine_round_nolock+0xb8/0xf0
              clk_core_round_rate_nolock+0x84/0x118
              clk_core_round_rate_nolock+0xd8/0x118
              clk_round_rate+0x6c/0xd0
              geni_se_clk_tbl_get+0x78/0xc0
              geni_se_clk_freq_match+0x44/0xe4
              get_spi_clk_cfg+0x50/0xf4
              geni_spi_set_clock_and_bw+0x54/0x104
              spi_geni_prepare_message+0x130/0x174
              __spi_pump_transfer_message+0x200/0x4d8
              __spi_sync+0x13c/0x23c
              spi_sync_locked+0x18/0x24
              do_cros_ec_pkt_xfer_spi+0x124/0x3f0
              cros_ec_xfer_high_pri_work+0x28/0x3c
              kthread_worker_fn+0x14c/0x27c
              kthread+0xf0/0x100
              ret_from_fork+0x10/0x20
    
       -> #0 (prepare_lock){+.+.}-{3:3}:
              __lock_acquire+0xdf8/0x109c
              lock_acquire+0x234/0x284
              __mutex_lock+0xc8/0x388
              mutex_lock_nested+0x2c/0x38
              clk_prepare_lock+0x70/0x98
              clk_prepare+0x24/0x50
              clk_bulk_prepare+0x50/0x9c
              a6xx_gmu_resume+0x94/0x800 [msm]
              a6xx_gmu_pm_resume+0x38/0x158 [msm]
              adreno_runtime_resume+0x2c/0x38 [msm]
              pm_generic_runtime_resume+0x30/0x44
              __rpm_callback+0x4c/0x134
              rpm_callback+0x78/0x7c
              rpm_resume+0x3a4/0x46c
              __pm_runtime_resume+0x78/0xbc
              pm_runtime_get_sync.isra.0+0x14/0x20 [msm]
              msm_gpu_submit+0x4c/0x12c [msm]
              msm_job_run+0x88/0x128 [msm]
              drm_sched_main+0x264/0x354 [gpu_sched]
              kthread+0xf0/0x100
              ret_from_fork+0x10/0x20
    
       other info that might help us debug this:
       Chain exists of:
         prepare_lock --> dma_fence_map --> &gpu->lock
        Possible unsafe locking scenario:
              CPU0                    CPU1
              ----                    ----
         lock(&gpu->lock);
                                      lock(dma_fence_map);
                                      lock(&gpu->lock);
         lock(prepare_lock);
    
        *** DEADLOCK ***
       2 locks held by ring0/373:
        #0: ffffffead875ae50 (dma_fence_map){++++}-{0:0}, at: drm_sched_main+0x54/0x354 [gpu_sched]
        #1: ffffff809cd19170 (&gpu->lock){+.+.}-{3:3}, at: msm_job_run+0x7c/0x128 [msm]
    
       stack backtrace:
       CPU: 2 PID: 373 Comm: ring0 Not tainted 6.4.3-debug+ #14
       Hardware name: Google Villager (rev1+) with LTE (DT)
       Call trace:
        dump_backtrace+0xb4/0xf0
        show_stack+0x20/0x30
        dump_stack_lvl+0x60/0x84
        dump_stack+0x18/0x24
        print_circular_bug+0x1cc/0x234
        check_noncircular+0x78/0xac
        __lock_acquire+0xdf8/0x109c
        lock_acquire+0x234/0x284
        __mutex_lock+0xc8/0x388
        mutex_lock_nested+0x2c/0x38
        clk_prepare_lock+0x70/0x98
        clk_prepare+0x24/0x50
        clk_bulk_prepare+0x50/0x9c
        a6xx_gmu_resume+0x94/0x800 [msm]
        a6xx_gmu_pm_resume+0x38/0x158 [msm]
        adreno_runtime_resume+0x2c/0x38 [msm]
        pm_generic_runtime_resume+0x30/0x44
        __rpm_callback+0x4c/0x134
        rpm_callback+0x78/0x7c
        rpm_resume+0x3a4/0x46c
        __pm_runtime_resume+0x78/0xbc
        pm_runtime_get_sync.isra.0+0x14/0x20 [msm]
        msm_gpu_submit+0x4c/0x12c [msm]
        msm_job_run+0x88/0x128 [msm]
        drm_sched_main+0x264/0x354 [gpu_sched]
        kthread+0xf0/0x100
        ret_from_fork+0x10/0x20
    Signed-off-by: default avatarRob Clark <robdclark@chromium.org>
    Patchwork: https://patchwork.freedesktop.org/patch/552298/
    abe2023b
msm_gpu.c 23.4 KB