Commit f046ca4a authored by Dave Airlie's avatar Dave Airlie

Merge tag 'amd-drm-fixes-6.1-2022-10-19' of...

Merge tag 'amd-drm-fixes-6.1-2022-10-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.1-2022-10-19:

amdgpu:
- Mode2 reset fixes for Sienna Cichlid
- Revert broken fan speed sensor fix
- SMU 13.x fixes
- GC 11.x fixes
- RAS fixes
- SR-IOV fixes
- Fix BO move breakage on SI
- Misc compiler fixes
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221019191357.6208-1-alexander.deucher@amd.com
parents 8865dd75 8273b404
......@@ -274,9 +274,6 @@ extern int amdgpu_vcnfw_log;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0)
#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1)
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
......@@ -1065,7 +1062,6 @@ struct amdgpu_device {
struct work_struct reset_work;
uint32_t amdgpu_reset_level_mask;
bool job_hang;
};
......
......@@ -134,7 +134,6 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
......
......@@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
lock_srbm(adev, mec, pipe, 0, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
......
......@@ -1954,8 +1954,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
return PTR_ERR(ent);
}
debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask);
/* Register debugfs entries for amdgpu_ttm */
amdgpu_ttm_debugfs_init(adev);
amdgpu_debugfs_pm_init(adev);
......
......@@ -2928,6 +2928,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
/*
* Per PMFW team's suggestion, driver needs to handle gfxoff
* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
* scenario. Add the missing df cstate disablement here.
*/
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
......@@ -5210,7 +5218,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
reset_context->job = job;
reset_context->hive = hive;
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
......@@ -5337,11 +5344,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN) {
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
adev->asic_reset_res = 0;
if (r && r == -EAGAIN)
goto retry;
}
if (!r && gpu_reset_for_dev_remove)
goto recover_end;
......@@ -5777,7 +5781,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
adev->no_hw_access = true;
r = amdgpu_device_pre_asic_reset(adev, &reset_context);
......
......@@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
......
......@@ -1950,7 +1950,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
......@@ -2268,6 +2267,25 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 2):
return true;
default:
return false;
}
}
if (adev->asic_type == CHIP_IP_DISCOVERY) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 10):
return true;
default:
return false;
}
}
return adev->asic_type == CHIP_VEGA10 ||
adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_ARCTURUS ||
......@@ -2311,11 +2329,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
!amdgpu_ras_asic_supported(adev))
return;
/* If driver run on sriov guest side, only enable ras for aldebaran */
if (amdgpu_sriov_vf(adev) &&
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
return;
if (!adev->gmc.xgmi.connected_to_cpu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
......
......@@ -37,8 +37,6 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;
adev->amdgpu_reset_level_mask = 0x1;
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
ret = aldebaran_reset_init(adev);
......@@ -76,12 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
{
struct amdgpu_reset_handler *reset_handler = NULL;
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
return -ENOSYS;
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;
if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
......@@ -98,12 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
int ret;
struct amdgpu_reset_handler *reset_handler = NULL;
if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2))
return -ENOSYS;
if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags))
return -ENOSYS;
if (adev->reset_cntl)
reset_handler = adev->reset_cntl->get_reset_handler(
adev->reset_cntl, reset_context);
......
......@@ -30,8 +30,7 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_SKIP_MODE2_RESET = 2,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 3,
AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
};
struct amdgpu_reset_context {
......
......@@ -405,9 +405,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY))
return false;
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
......
......@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(&cursor, cursor.size);
if (!cursor.remaining)
break;
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;
......
......@@ -726,6 +726,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
/* VF MMIO access (except mailbox range) from CPU
* will be blocked during sriov runtime
*/
adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
switch (adev->asic_type) {
......
......@@ -31,6 +31,7 @@
#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
/* flags for indirect register access path supported by rlcg for sriov */
#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
......@@ -297,6 +298,9 @@ struct amdgpu_video_codec_info;
#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
#define amdgpu_sriov_vf_mmio_access_protection(adev) \
((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
......
......@@ -2338,7 +2338,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
/* For asic with VF MMIO access protection
* avoid using CPU for VM table updates
*/
if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!amdgpu_sriov_vf_mmio_access_protection(adev))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
......
......@@ -1571,7 +1571,7 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
/* Enable trap for each kfd vmid. */
data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
}
soc21_grbm_select(adev, 0, 0, 0, 0);
......@@ -5076,6 +5076,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
......
......@@ -186,6 +186,10 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
unsigned char hub_ip = 0;
hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
......@@ -199,8 +203,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng);
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
......@@ -210,12 +214,12 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng);
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
......@@ -229,8 +233,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0, hub_ip);
/* Issue additional private vm invalidation to MMHUB */
if ((vmhub != AMDGPU_GFXHUB_0) &&
......
......@@ -1156,6 +1156,42 @@ static int mes_v11_0_sw_fini(void *handle)
return 0;
}
static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev)
{
uint32_t data;
int i;
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
}
data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 1);
WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
adev->mes.ring.sched.ready = false;
}
static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
{
uint32_t tmp;
......@@ -1207,6 +1243,9 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
{
if (adev->mes.ring.sched.ready)
mes_v11_0_kiq_dequeue_sched(adev);
mes_v11_0_enable(adev, false);
return 0;
}
......@@ -1262,9 +1301,6 @@ static int mes_v11_0_hw_init(void *handle)
static int mes_v11_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mes.ring.sched.ready = false;
return 0;
}
......@@ -1296,7 +1332,8 @@ static int mes_v11_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (!amdgpu_in_reset(adev))
if (!amdgpu_in_reset(adev) &&
(adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
amdgpu_mes_self_test(adev);
return 0;
......
......@@ -290,7 +290,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
......
......@@ -317,7 +317,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
......
......@@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
......
......@@ -31,12 +31,23 @@
#include "amdgpu_psp.h"
#include "amdgpu_xgmi.h"
static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
{
#if 0
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) &&
adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
return true;
#endif
return false;
}
static struct amdgpu_reset_handler *
sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
if (reset_context->method != AMD_RESET_METHOD_NONE) {
list_for_each_entry(handler, &reset_ctl->reset_handlers,
......@@ -44,15 +55,13 @@ sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
if (handler->reset_method == reset_context->method)
return handler;
}
} else {
list_for_each_entry(handler, &reset_ctl->reset_handlers,
}
if (sienna_cichlid_is_mode2_default(reset_ctl)) {
list_for_each_entry (handler, &reset_ctl->reset_handlers,
handler_list) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2 &&
adev->pm.fw_version >= 0x3a5500 &&
!amdgpu_sriov_vf(adev)) {
reset_context->method = AMD_RESET_METHOD_MODE2;
if (handler->reset_method == AMD_RESET_METHOD_MODE2)
return handler;
}
}
}
......
......@@ -423,6 +423,7 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 0):
return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
return false;
default:
return true;
......@@ -636,7 +637,11 @@ static int soc21_common_early_init(void *handle)
break;
case IP_VERSION(11, 0, 3):
adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_GFX_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
......
......@@ -77,7 +77,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
......
......@@ -262,8 +262,9 @@ struct kfd2kgd_calls {
uint32_t queue_id);
int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd,
uint32_t reset_type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id);
enum kfd_preempt_type reset_type,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id);
bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd);
......
......@@ -3362,11 +3362,11 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
if (adev->pm.sysfs_initialized)
return 0;
INIT_LIST_HEAD(&adev->pm.pm_attr_list);
if (adev->pm.dpm_enabled == 0)
return 0;
INIT_LIST_HEAD(&adev->pm.pm_attr_list);
adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
DRIVER_NAME, adev,
hwmon_groups);
......
......@@ -67,21 +67,22 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr,
int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
uint32_t *speed)
{
struct amdgpu_device *adev = hwmgr->adev;
uint32_t duty100, duty;
uint64_t tmp64;
uint32_t current_rpm;
uint32_t percent = 0;
duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
CG_FDO_CTRL1, FMAX_DUTY100);
duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
CG_THERMAL_STATUS, FDO_PWM_DUTY);
if (hwmgr->thermal_controller.fanInfo.bNoFan)
return 0;
if (!duty100)
return -EINVAL;
if (vega10_get_current_rpm(hwmgr, &current_rpm))
return -1;
if (hwmgr->thermal_controller.
advanceFanControlParameters.usMaxFanRPM != 0)
percent = current_rpm * 255 /
hwmgr->thermal_controller.
advanceFanControlParameters.usMaxFanRPM;
tmp64 = (uint64_t)duty * 255;
do_div(tmp64, duty100);
*speed = MIN((uint32_t)tmp64, 255);
*speed = MIN(percent, 255);
return 0;
}
......
......@@ -1314,8 +1314,8 @@ static int smu_smc_hw_setup(struct smu_context *smu)
ret = smu_enable_thermal_alert(smu);
if (ret) {
dev_err(adev->dev, "Failed to enable thermal alert!\n");
return ret;
dev_err(adev->dev, "Failed to enable thermal alert!\n");
return ret;
}
ret = smu_notify_display_change(smu);
......
......@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
#define PMFW_DRIVER_IF_VERSION 5
#define PMFW_DRIVER_IF_VERSION 7
typedef struct {
int32_t value;
......@@ -163,8 +163,8 @@ typedef struct {
uint16_t DclkFrequency; //[MHz]
uint16_t MemclkFrequency; //[MHz]
uint16_t spare; //[centi]
uint16_t UvdActivity; //[centi]
uint16_t GfxActivity; //[centi]
uint16_t UvdActivity; //[centi]
uint16_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_SOC
uint16_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_SOC
......@@ -199,6 +199,19 @@ typedef struct {
uint16_t DeviceState;
uint16_t CurTemp; //[centi-Celsius]
uint16_t spare2;
uint16_t AverageGfxclkFrequency;
uint16_t AverageFclkFrequency;
uint16_t AverageGfxActivity;
uint16_t AverageSocclkFrequency;
uint16_t AverageVclkFrequency;
uint16_t AverageVcnActivity;
uint16_t AverageDRAMReads; //Filtered DF Bandwidth::DRAM Reads
uint16_t AverageDRAMWrites; //Filtered DF Bandwidth::DRAM Writes
uint16_t AverageSocketPower; //Filtered value of CurrentSocketPower
uint16_t AverageCorePower; //Filtered of [sum of CorePower[8]])
uint16_t AverageCoreC0Residency[8]; //Filtered of [average C0 residency % per core]
uint32_t MetricsCounter; //Counts the # of metrics table parameter reads per update to the metrics table, i.e. if the metrics table update happens every 1 second, this value could be up to 1000 if the smu collected metrics data every cycle, or as low as 0 if the smu was asleep the whole time. Reset to 0 after writing.
} SmuMetrics_t;
typedef struct {
......
......@@ -28,7 +28,7 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
......
......@@ -2242,9 +2242,17 @@ static void arcturus_get_unique_id(struct smu_context *smu)
static int arcturus_set_df_cstate(struct smu_context *smu,
enum pp_df_cstate state)
{
struct amdgpu_device *adev = smu->adev;
uint32_t smu_version;
int ret;
/*
* Arcturus does not need the cstate disablement
* prerequisite for gpu reset.
*/
if (amdgpu_in_reset(adev) || adev->in_suspend)
return 0;
ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
if (ret) {
dev_err(smu->adev->dev, "Failed to get smu version!\n");
......
......@@ -1640,6 +1640,15 @@ static bool aldebaran_is_baco_supported(struct smu_context *smu)
static int aldebaran_set_df_cstate(struct smu_context *smu,
enum pp_df_cstate state)
{
struct amdgpu_device *adev = smu->adev;
/*
* Aldebaran does not need the cstate disablement
* prerequisite for gpu reset.
*/
if (amdgpu_in_reset(adev) || adev->in_suspend)
return 0;
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL);
}
......
......@@ -211,7 +211,8 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
return 0;
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7)) ||
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)))
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 0)) ||
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)))
return 0;
/* override pptable_id from driver parameter */
......@@ -454,9 +455,6 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
dev_info(adev->dev, "override pptable id %d\n", pptable_id);
} else {
pptable_id = smu->smu_table.boot_values.pp_table_id;
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10))
pptable_id = 6666;
}
/* force using vbios pptable in sriov mode */
......
......@@ -119,6 +119,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0),
MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
};
static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
......@@ -1753,6 +1754,15 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
return ret;
}
static int smu_v13_0_0_set_df_cstate(struct smu_context *smu,
enum pp_df_cstate state)
{
return smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_DFCstateControl,
state,
NULL);
}
static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
......@@ -1822,6 +1832,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
.mode1_reset = smu_v13_0_mode1_reset,
.set_mp1_state = smu_v13_0_0_set_mp1_state,
.set_df_cstate = smu_v13_0_0_set_df_cstate,
};
void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
......
......@@ -121,6 +121,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0),
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
};
static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
......@@ -1587,6 +1588,16 @@ static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
return true;
}
static int smu_v13_0_7_set_df_cstate(struct smu_context *smu,
enum pp_df_cstate state)
{
return smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_DFCstateControl,
state,
NULL);
}
static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
......@@ -1649,6 +1660,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
.mode1_reset = smu_v13_0_mode1_reset,
.set_mp1_state = smu_v13_0_7_set_mp1_state,
.set_df_cstate = smu_v13_0_7_set_df_cstate,
};
void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment