Commit 34a1b0f9 authored by Rodrigo Siqueira's avatar Rodrigo Siqueira Committed by Alex Deucher

drm/amd/display: Move insert entry table to the FPU code

The insert_entry_into_table_sorted function uses FPU operation and calls
other static functions support. This commit moves the insert entry
function with all the required struct and static functions to the FPU
file.
Tested-by: default avatarDaniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: default avatarHarry Wentland <Harry.Wentland@amd.com>
Signed-off-by: default avatarRodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 44998fbd
......@@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C
#define DC_LOGGER_INIT(logger)
#define DCN3_2_DEFAULT_DET_SIZE 256
#define DCN3_2_MAX_DET_SIZE 1152
#define DCN3_2_MIN_DET_SIZE 128
#define DCN3_2_MIN_COMPBUF_SIZE_KB 128
struct _vcs_dpi_ip_params_st dcn3_2_ip = {
.gpuvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
.det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
.config_return_buffer_size_in_kbytes = 1280,
.compressed_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 22,
.zero_size_buffer_entries = 512,
.compbuf_reserved_space_64b = 256,
.compbuf_reserved_space_zs = 64,
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team
.min_pixel_chunk_size_bytes = 1024,
.dcc_meta_buffer_size_bytes = 6272,
.meta_chunk_size_kbytes = 2,
.min_meta_chunk_size_bytes = 256,
.writeback_chunk_size_kbytes = 8,
.ptoi_supported = false,
.num_dsc = 4,
.maximum_dsc_bits_per_component = 12,
.maximum_pixels_per_line_per_dsc_unit = 6016,
.dsc422_native_support = true,
.is_line_buffer_bpp_fixed = true,
.line_buffer_fixed_bpp = 57,
.line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp
.max_line_buffer_lines = 32,
.writeback_interface_buffer_size_kbytes = 90,
.max_num_dpp = 4,
.max_num_otg = 4,
.max_num_hdmi_frl_outputs = 1,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 6,
.max_vscl_ratio = 6,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dpte_buffer_size_in_pte_reqs_luma = 64,
.dpte_buffer_size_in_pte_reqs_chroma = 34,
.dispclk_ramp_margin_percent = 1,
.max_inter_dcn_tile_repeaters = 8,
.cursor_buffer_size = 16,
.cursor_chunk_size = 2,
.writeback_line_buffer_buffer_size = 0,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_max_hscl_taps = 1,
.writeback_max_vscl_taps = 1,
.dppclk_delay_subtotal = 47,
.dppclk_delay_scl = 50,
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_cnvc_formatter = 28,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 125,
.dynamic_metadata_vm_enabled = false,
.odm_combine_4to1_supported = false,
.dcc_supported = true,
.max_num_dp2p0_outputs = 2,
.max_num_dp2p0_streams = 4,
};
struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.clock_limits = {
{
.state = 0,
.dcfclk_mhz = 1564.0,
.fabricclk_mhz = 400.0,
.dispclk_mhz = 2150.0,
.dppclk_mhz = 2150.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
.phyclk_d32_mhz = 625.0,
.socclk_mhz = 1200.0,
.dscclk_mhz = 716.667,
.dram_speed_mts = 1600.0,
.dtbclk_mhz = 1564.0,
},
},
.num_states = 1,
.sr_exit_time_us = 5.20,
.sr_enter_plus_exit_time_us = 9.60,
.sr_exit_z8_time_us = 285.0,
.sr_enter_plus_exit_z8_time_us = 320,
.writeback_latency_us = 12.0,
.round_trip_ping_latency_dcfclk_cycles = 263,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
.fclk_change_latency_us = 20,
.usr_retraining_latency_us = 2,
.smn_latency_us = 2,
.mall_allocated_for_dcn_mbytes = 64,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
.pct_ideal_sdp_bw_after_urgent = 100.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_bw_after_urgent_strobe = 67.0,
.max_avg_sdp_bw_use_normal_percent = 80.0,
.max_avg_fabric_bw_use_normal_percent = 60.0,
.max_avg_dram_bw_use_normal_strobe_percent = 50.0,
.max_avg_dram_bw_use_normal_percent = 15.0,
.num_chans = 8,
.dram_channel_width_bytes = 2,
.fabric_datapath_to_dcn_data_return_bytes = 64,
.return_bus_width_bytes = 64,
.downspread_percent = 0.38,
.dcn_downspread_percent = 0.5,
.dram_clock_change_latency_us = 400,
.dispclk_dppclk_vco_speed_mhz = 4300.0,
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
};
enum dcn32_clk_src_array_id {
DCN32_CLK_SRC_PLL0,
DCN32_CLK_SRC_PLL1,
......@@ -3454,53 +3323,6 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
}
}
static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
{
float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_2_soc.num_chans *
dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
float limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
return limiting_bw_kbytes_sec;
}
static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry)
{
int index = 0;
int i = 0;
float net_bw_of_new_state = 0;
if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else {
net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
index++;
if (index >= *num_entries)
break;
}
for (i = *num_entries; i > index; i--) {
table[i] = table[i - 1];
}
table[index] = *entry;
(*num_entries)++;
}
}
static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries,
unsigned int index)
{
......@@ -3585,7 +3407,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;
get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}
// Insert the max DCFCLK
......@@ -3594,7 +3418,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;
get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
// Insert the UCLK DPMS
for (i = 0; i < num_uclk_dpms; i++) {
......@@ -3603,7 +3429,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}
// If FCLK is coarse grained, insert individual DPMs.
......@@ -3614,7 +3442,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;
get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}
}
// If FCLK fine grained, only insert max
......@@ -3624,7 +3454,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
entry.dram_speed_mts = 0;
get_optimal_ntuple(&entry);
DC_FP_START();
insert_entry_into_table_sorted(table, num_entries, &entry);
DC_FP_END();
}
// At this point, the table contains all "points of interest" based on
......
......@@ -33,6 +33,9 @@
#define TO_DCN32_RES_POOL(pool)\
container_of(pool, struct dcn32_resource_pool, base)
extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;
struct dcn32_resource_pool {
struct resource_pool base;
};
......
......@@ -28,6 +28,132 @@
// We need this includes for WATERMARKS_* defines
#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
struct _vcs_dpi_ip_params_st dcn3_2_ip = {
.gpuvm_enable = 0,
.gpuvm_max_page_table_levels = 4,
.hostvm_enable = 0,
.rob_buffer_size_kbytes = 128,
.det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE,
.config_return_buffer_size_in_kbytes = 1280,
.compressed_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 22,
.zero_size_buffer_entries = 512,
.compbuf_reserved_space_64b = 256,
.compbuf_reserved_space_zs = 64,
.dpp_output_buffer_pixels = 2560,
.opp_output_buffer_lines = 1,
.pixel_chunk_size_kbytes = 8,
.alpha_pixel_chunk_size_kbytes = 4,
.min_pixel_chunk_size_bytes = 1024,
.dcc_meta_buffer_size_bytes = 6272,
.meta_chunk_size_kbytes = 2,
.min_meta_chunk_size_bytes = 256,
.writeback_chunk_size_kbytes = 8,
.ptoi_supported = false,
.num_dsc = 4,
.maximum_dsc_bits_per_component = 12,
.maximum_pixels_per_line_per_dsc_unit = 6016,
.dsc422_native_support = true,
.is_line_buffer_bpp_fixed = true,
.line_buffer_fixed_bpp = 57,
.line_buffer_size_bits = 1171920,
.max_line_buffer_lines = 32,
.writeback_interface_buffer_size_kbytes = 90,
.max_num_dpp = 4,
.max_num_otg = 4,
.max_num_hdmi_frl_outputs = 1,
.max_num_wb = 1,
.max_dchub_pscl_bw_pix_per_clk = 4,
.max_pscl_lb_bw_pix_per_clk = 2,
.max_lb_vscl_bw_pix_per_clk = 4,
.max_vscl_hscl_bw_pix_per_clk = 4,
.max_hscl_ratio = 6,
.max_vscl_ratio = 6,
.max_hscl_taps = 8,
.max_vscl_taps = 8,
.dpte_buffer_size_in_pte_reqs_luma = 64,
.dpte_buffer_size_in_pte_reqs_chroma = 34,
.dispclk_ramp_margin_percent = 1,
.max_inter_dcn_tile_repeaters = 8,
.cursor_buffer_size = 16,
.cursor_chunk_size = 2,
.writeback_line_buffer_buffer_size = 0,
.writeback_min_hscl_ratio = 1,
.writeback_min_vscl_ratio = 1,
.writeback_max_hscl_ratio = 1,
.writeback_max_vscl_ratio = 1,
.writeback_max_hscl_taps = 1,
.writeback_max_vscl_taps = 1,
.dppclk_delay_subtotal = 47,
.dppclk_delay_scl = 50,
.dppclk_delay_scl_lb_only = 16,
.dppclk_delay_cnvc_formatter = 28,
.dppclk_delay_cnvc_cursor = 6,
.dispclk_delay_subtotal = 125,
.dynamic_metadata_vm_enabled = false,
.odm_combine_4to1_supported = false,
.dcc_supported = true,
.max_num_dp2p0_outputs = 2,
.max_num_dp2p0_streams = 4,
};
struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.clock_limits = {
{
.state = 0,
.dcfclk_mhz = 1564.0,
.fabricclk_mhz = 400.0,
.dispclk_mhz = 2150.0,
.dppclk_mhz = 2150.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
.phyclk_d32_mhz = 625.0,
.socclk_mhz = 1200.0,
.dscclk_mhz = 716.667,
.dram_speed_mts = 16000.0,
.dtbclk_mhz = 1564.0,
},
},
.num_states = 1,
.sr_exit_time_us = 5.20,
.sr_enter_plus_exit_time_us = 9.60,
.sr_exit_z8_time_us = 285.0,
.sr_enter_plus_exit_z8_time_us = 320,
.writeback_latency_us = 12.0,
.round_trip_ping_latency_dcfclk_cycles = 263,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
.urgent_latency_vm_data_only_us = 4.0,
.fclk_change_latency_us = 20,
.usr_retraining_latency_us = 2,
.smn_latency_us = 2,
.mall_allocated_for_dcn_mbytes = 64,
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
.pct_ideal_sdp_bw_after_urgent = 100.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
.pct_ideal_dram_bw_after_urgent_strobe = 67.0,
.max_avg_sdp_bw_use_normal_percent = 80.0,
.max_avg_fabric_bw_use_normal_percent = 60.0,
.max_avg_dram_bw_use_normal_strobe_percent = 50.0,
.max_avg_dram_bw_use_normal_percent = 15.0,
.num_chans = 8,
.dram_channel_width_bytes = 2,
.fabric_datapath_to_dcn_data_return_bytes = 64,
.return_bus_width_bytes = 64,
.downspread_percent = 0.38,
.dcn_downspread_percent = 0.5,
.dram_clock_change_latency_us = 400,
.dispclk_dppclk_vco_speed_mhz = 4300.0,
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
};
void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
{
/* defaults */
......@@ -191,3 +317,63 @@ bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st p
return false;
}
static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
{
float memory_bw_kbytes_sec;
float fabric_bw_kbytes_sec;
float sdp_bw_kbytes_sec;
float limiting_bw_kbytes_sec;
memory_bw_kbytes_sec = entry->dram_speed_mts *
dcn3_2_soc.num_chans *
dcn3_2_soc.dram_channel_width_bytes *
((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100);
fabric_bw_kbytes_sec = entry->fabricclk_mhz *
dcn3_2_soc.return_bus_width_bytes *
((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100);
sdp_bw_kbytes_sec = entry->dcfclk_mhz *
dcn3_2_soc.return_bus_width_bytes *
((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100);
limiting_bw_kbytes_sec = memory_bw_kbytes_sec;
if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
return limiting_bw_kbytes_sec;
}
void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry)
{
int i = 0;
int index = 0;
float net_bw_of_new_state = 0;
dc_assert_fp_enabled();
if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else {
net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
index++;
if (index >= *num_entries)
break;
}
for (i = *num_entries; i > index; i--)
table[i] = table[i - 1];
table[index] = *entry;
(*num_entries)++;
}
}
......@@ -29,6 +29,11 @@
#include "clk_mgr_internal.h"
#define DCN3_2_DEFAULT_DET_SIZE 256
#define DCN3_2_MAX_DET_SIZE 1152
#define DCN3_2_MIN_DET_SIZE 128
#define DCN3_2_MIN_COMPBUF_SIZE_KB 128
void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr);
void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
......@@ -40,4 +45,8 @@ bool dcn32_predict_pipe_split(struct dc_state *context,
display_pipe_params_st pipe,
int index);
void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment