7185 lines
272 KiB
C
7185 lines
272 KiB
C
/*
|
|
* Copyright 2017 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors: AMD
|
|
*
|
|
*/
|
|
|
|
#include "dc.h"
|
|
#include "dc_link.h"
|
|
#include "../display_mode_lib.h"
|
|
#include "../dcn30/display_mode_vba_30.h"
|
|
#include "display_mode_vba_31.h"
|
|
#include "../dml_inline_defs.h"
|
|
|
|
/*
|
|
* NOTE:
|
|
* This file is gcc-parsable HW gospel, coming straight from HW engineers.
|
|
*
|
|
* It doesn't adhere to Linux kernel style and sometimes will do things in odd
|
|
* ways. Unless there is something clearly wrong with it the code should
|
|
* remain as-is as it provides us with a guarantee from HW that it is correct.
|
|
*/
|
|
|
|
#define BPP_INVALID 0
|
|
#define BPP_BLENDED_PIPE 0xffffffff
|
|
#define DCN31_MAX_DSC_IMAGE_WIDTH 5184
|
|
#define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
|
|
#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
|
|
#define DCN3_15_MAX_DET_SIZE 384
|
|
|
|
// For DML-C changes that hasn't been propagated to VBA yet
|
|
//#define __DML_VBA_ALLOW_DELTA__
|
|
|
|
// Move these to ip paramaters/constant
|
|
|
|
// At which vstartup the DML start to try if the mode can be supported
|
|
#define __DML_VBA_MIN_VSTARTUP__ 9
|
|
|
|
// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
|
|
#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
|
|
|
|
// fudge factor for min dcfclk calclation
|
|
#define __DML_MIN_DCFCLK_FACTOR__ 1.15
|
|
|
|
typedef struct {
|
|
double DPPCLK;
|
|
double DISPCLK;
|
|
double PixelClock;
|
|
double DCFCLKDeepSleep;
|
|
unsigned int DPPPerPlane;
|
|
bool ScalerEnabled;
|
|
double VRatio;
|
|
double VRatioChroma;
|
|
enum scan_direction_class SourceScan;
|
|
unsigned int BlockWidth256BytesY;
|
|
unsigned int BlockHeight256BytesY;
|
|
unsigned int BlockWidth256BytesC;
|
|
unsigned int BlockHeight256BytesC;
|
|
unsigned int InterlaceEnable;
|
|
unsigned int NumberOfCursors;
|
|
unsigned int VBlank;
|
|
unsigned int HTotal;
|
|
unsigned int DCCEnable;
|
|
bool ODMCombineIsEnabled;
|
|
enum source_format_class SourcePixelFormat;
|
|
int BytePerPixelY;
|
|
int BytePerPixelC;
|
|
bool ProgressiveToInterlaceUnitInOPP;
|
|
} Pipe;
|
|
|
|
#define BPP_INVALID 0
|
|
#define BPP_BLENDED_PIPE 0xffffffff
|
|
|
|
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
|
|
static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
|
|
static unsigned int dscceComputeDelay(
|
|
unsigned int bpc,
|
|
double BPP,
|
|
unsigned int sliceWidth,
|
|
unsigned int numSlices,
|
|
enum output_format_class pixelFormat,
|
|
enum output_encoder_class Output);
|
|
static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
|
|
static bool CalculatePrefetchSchedule(
|
|
struct display_mode_lib *mode_lib,
|
|
double HostVMInefficiencyFactor,
|
|
Pipe *myPipe,
|
|
unsigned int DSCDelay,
|
|
double DPPCLKDelaySubtotalPlusCNVCFormater,
|
|
double DPPCLKDelaySCL,
|
|
double DPPCLKDelaySCLLBOnly,
|
|
double DPPCLKDelayCNVCCursor,
|
|
double DISPCLKDelaySubtotal,
|
|
unsigned int DPP_RECOUT_WIDTH,
|
|
enum output_format_class OutputFormat,
|
|
unsigned int MaxInterDCNTileRepeaters,
|
|
unsigned int VStartup,
|
|
unsigned int MaxVStartup,
|
|
unsigned int GPUVMPageTableLevels,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
unsigned int HostVMMaxNonCachedPageTableLevels,
|
|
double HostVMMinPageSize,
|
|
bool DynamicMetadataEnable,
|
|
bool DynamicMetadataVMEnabled,
|
|
int DynamicMetadataLinesBeforeActiveRequired,
|
|
unsigned int DynamicMetadataTransmittedBytes,
|
|
double UrgentLatency,
|
|
double UrgentExtraLatency,
|
|
double TCalc,
|
|
unsigned int PDEAndMetaPTEBytesFrame,
|
|
unsigned int MetaRowByte,
|
|
unsigned int PixelPTEBytesPerRow,
|
|
double PrefetchSourceLinesY,
|
|
unsigned int SwathWidthY,
|
|
double VInitPreFillY,
|
|
unsigned int MaxNumSwathY,
|
|
double PrefetchSourceLinesC,
|
|
unsigned int SwathWidthC,
|
|
double VInitPreFillC,
|
|
unsigned int MaxNumSwathC,
|
|
int swath_width_luma_ub,
|
|
int swath_width_chroma_ub,
|
|
unsigned int SwathHeightY,
|
|
unsigned int SwathHeightC,
|
|
double TWait,
|
|
double *DSTXAfterScaler,
|
|
double *DSTYAfterScaler,
|
|
double *DestinationLinesForPrefetch,
|
|
double *PrefetchBandwidth,
|
|
double *DestinationLinesToRequestVMInVBlank,
|
|
double *DestinationLinesToRequestRowInVBlank,
|
|
double *VRatioPrefetchY,
|
|
double *VRatioPrefetchC,
|
|
double *RequiredPrefetchPixDataBWLuma,
|
|
double *RequiredPrefetchPixDataBWChroma,
|
|
bool *NotEnoughTimeForDynamicMetadata,
|
|
double *Tno_bw,
|
|
double *prefetch_vmrow_bw,
|
|
double *Tdmdl_vm,
|
|
double *Tdmdl,
|
|
double *TSetup,
|
|
int *VUpdateOffsetPix,
|
|
double *VUpdateWidthPix,
|
|
double *VReadyOffsetPix);
|
|
static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
|
|
static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
|
|
static void CalculateDCCConfiguration(
|
|
bool DCCEnabled,
|
|
bool DCCProgrammingAssumesScanDirectionUnknown,
|
|
enum source_format_class SourcePixelFormat,
|
|
unsigned int SurfaceWidthLuma,
|
|
unsigned int SurfaceWidthChroma,
|
|
unsigned int SurfaceHeightLuma,
|
|
unsigned int SurfaceHeightChroma,
|
|
double DETBufferSize,
|
|
unsigned int RequestHeight256ByteLuma,
|
|
unsigned int RequestHeight256ByteChroma,
|
|
enum dm_swizzle_mode TilingFormat,
|
|
unsigned int BytePerPixelY,
|
|
unsigned int BytePerPixelC,
|
|
double BytePerPixelDETY,
|
|
double BytePerPixelDETC,
|
|
enum scan_direction_class ScanOrientation,
|
|
unsigned int *MaxUncompressedBlockLuma,
|
|
unsigned int *MaxUncompressedBlockChroma,
|
|
unsigned int *MaxCompressedBlockLuma,
|
|
unsigned int *MaxCompressedBlockChroma,
|
|
unsigned int *IndependentBlockLuma,
|
|
unsigned int *IndependentBlockChroma);
|
|
static double CalculatePrefetchSourceLines(
|
|
struct display_mode_lib *mode_lib,
|
|
double VRatio,
|
|
double vtaps,
|
|
bool Interlace,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
unsigned int SwathHeight,
|
|
unsigned int ViewportYStart,
|
|
double *VInitPreFill,
|
|
unsigned int *MaxNumSwath);
|
|
static unsigned int CalculateVMAndRowBytes(
|
|
struct display_mode_lib *mode_lib,
|
|
bool DCCEnable,
|
|
unsigned int BlockHeight256Bytes,
|
|
unsigned int BlockWidth256Bytes,
|
|
enum source_format_class SourcePixelFormat,
|
|
unsigned int SurfaceTiling,
|
|
unsigned int BytePerPixel,
|
|
enum scan_direction_class ScanDirection,
|
|
unsigned int SwathWidth,
|
|
unsigned int ViewportHeight,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
unsigned int HostVMMaxNonCachedPageTableLevels,
|
|
unsigned int GPUVMMinPageSize,
|
|
unsigned int HostVMMinPageSize,
|
|
unsigned int PTEBufferSizeInRequests,
|
|
unsigned int Pitch,
|
|
unsigned int DCCMetaPitch,
|
|
unsigned int *MacroTileWidth,
|
|
unsigned int *MetaRowByte,
|
|
unsigned int *PixelPTEBytesPerRow,
|
|
bool *PTEBufferSizeNotExceeded,
|
|
int *dpte_row_width_ub,
|
|
unsigned int *dpte_row_height,
|
|
unsigned int *MetaRequestWidth,
|
|
unsigned int *MetaRequestHeight,
|
|
unsigned int *meta_row_width,
|
|
unsigned int *meta_row_height,
|
|
int *vm_group_bytes,
|
|
unsigned int *dpte_group_bytes,
|
|
unsigned int *PixelPTEReqWidth,
|
|
unsigned int *PixelPTEReqHeight,
|
|
unsigned int *PTERequestSize,
|
|
int *DPDE0BytesFrame,
|
|
int *MetaPTEBytesFrame);
|
|
static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
|
|
static void CalculateRowBandwidth(
|
|
bool GPUVMEnable,
|
|
enum source_format_class SourcePixelFormat,
|
|
double VRatio,
|
|
double VRatioChroma,
|
|
bool DCCEnable,
|
|
double LineTime,
|
|
unsigned int MetaRowByteLuma,
|
|
unsigned int MetaRowByteChroma,
|
|
unsigned int meta_row_height_luma,
|
|
unsigned int meta_row_height_chroma,
|
|
unsigned int PixelPTEBytesPerRowLuma,
|
|
unsigned int PixelPTEBytesPerRowChroma,
|
|
unsigned int dpte_row_height_luma,
|
|
unsigned int dpte_row_height_chroma,
|
|
double *meta_row_bw,
|
|
double *dpte_row_bw);
|
|
|
|
static void CalculateFlipSchedule(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int k,
|
|
double HostVMInefficiencyFactor,
|
|
double UrgentExtraLatency,
|
|
double UrgentLatency,
|
|
double PDEAndMetaPTEBytesPerFrame,
|
|
double MetaRowBytes,
|
|
double DPTEBytesPerRow);
|
|
static double CalculateWriteBackDelay(
|
|
enum source_format_class WritebackPixelFormat,
|
|
double WritebackHRatio,
|
|
double WritebackVRatio,
|
|
unsigned int WritebackVTaps,
|
|
int WritebackDestinationWidth,
|
|
int WritebackDestinationHeight,
|
|
int WritebackSourceHeight,
|
|
unsigned int HTotal);
|
|
|
|
static void CalculateVupdateAndDynamicMetadataParameters(
|
|
int MaxInterDCNTileRepeaters,
|
|
double DPPCLK,
|
|
double DISPCLK,
|
|
double DCFClkDeepSleep,
|
|
double PixelClock,
|
|
int HTotal,
|
|
int VBlank,
|
|
int DynamicMetadataTransmittedBytes,
|
|
int DynamicMetadataLinesBeforeActiveRequired,
|
|
int InterlaceEnable,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
double *TSetup,
|
|
double *Tdmbf,
|
|
double *Tdmec,
|
|
double *Tdmsks,
|
|
int *VUpdateOffsetPix,
|
|
double *VUpdateWidthPix,
|
|
double *VReadyOffsetPix);
|
|
|
|
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int PrefetchMode,
|
|
double DCFCLK,
|
|
double ReturnBW,
|
|
double UrgentLatency,
|
|
double ExtraLatency,
|
|
double SOCCLK,
|
|
double DCFCLKDeepSleep,
|
|
unsigned int DETBufferSizeY[],
|
|
unsigned int DETBufferSizeC[],
|
|
unsigned int SwathHeightY[],
|
|
unsigned int SwathHeightC[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
unsigned int DPPPerPlane[],
|
|
double BytePerPixelDETY[],
|
|
double BytePerPixelDETC[],
|
|
bool UnboundedRequestEnabled,
|
|
int unsigned CompressedBufferSizeInkByte,
|
|
enum clock_change_support *DRAMClockChangeSupport,
|
|
double *StutterExitWatermark,
|
|
double *StutterEnterPlusExitWatermark,
|
|
double *Z8StutterExitWatermark,
|
|
double *Z8StutterEnterPlusExitWatermark);
|
|
|
|
static void CalculateDCFCLKDeepSleep(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int NumberOfActivePlanes,
|
|
int BytePerPixelY[],
|
|
int BytePerPixelC[],
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
unsigned int DPPPerPlane[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
double PixelClock[],
|
|
double PSCL_THROUGHPUT[],
|
|
double PSCL_THROUGHPUT_CHROMA[],
|
|
double DPPCLK[],
|
|
double ReadBandwidthLuma[],
|
|
double ReadBandwidthChroma[],
|
|
int ReturnBusWidth,
|
|
double *DCFCLKDeepSleep);
|
|
|
|
static void CalculateUrgentBurstFactor(
|
|
int swath_width_luma_ub,
|
|
int swath_width_chroma_ub,
|
|
unsigned int SwathHeightY,
|
|
unsigned int SwathHeightC,
|
|
double LineTime,
|
|
double UrgentLatency,
|
|
double CursorBufferSize,
|
|
unsigned int CursorWidth,
|
|
unsigned int CursorBPP,
|
|
double VRatio,
|
|
double VRatioC,
|
|
double BytePerPixelInDETY,
|
|
double BytePerPixelInDETC,
|
|
double DETBufferSizeY,
|
|
double DETBufferSizeC,
|
|
double *UrgentBurstFactorCursor,
|
|
double *UrgentBurstFactorLuma,
|
|
double *UrgentBurstFactorChroma,
|
|
bool *NotEnoughUrgentLatencyHiding);
|
|
|
|
static void UseMinimumDCFCLK(
|
|
struct display_mode_lib *mode_lib,
|
|
int MaxPrefetchMode,
|
|
int ReorderingBytes);
|
|
|
|
static void CalculatePixelDeliveryTimes(
|
|
unsigned int NumberOfActivePlanes,
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double VRatioPrefetchY[],
|
|
double VRatioPrefetchC[],
|
|
unsigned int swath_width_luma_ub[],
|
|
unsigned int swath_width_chroma_ub[],
|
|
unsigned int DPPPerPlane[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
double PixelClock[],
|
|
double PSCL_THROUGHPUT[],
|
|
double PSCL_THROUGHPUT_CHROMA[],
|
|
double DPPCLK[],
|
|
int BytePerPixelC[],
|
|
enum scan_direction_class SourceScan[],
|
|
unsigned int NumberOfCursors[],
|
|
unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
|
|
unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
|
|
unsigned int BlockWidth256BytesY[],
|
|
unsigned int BlockHeight256BytesY[],
|
|
unsigned int BlockWidth256BytesC[],
|
|
unsigned int BlockHeight256BytesC[],
|
|
double DisplayPipeLineDeliveryTimeLuma[],
|
|
double DisplayPipeLineDeliveryTimeChroma[],
|
|
double DisplayPipeLineDeliveryTimeLumaPrefetch[],
|
|
double DisplayPipeLineDeliveryTimeChromaPrefetch[],
|
|
double DisplayPipeRequestDeliveryTimeLuma[],
|
|
double DisplayPipeRequestDeliveryTimeChroma[],
|
|
double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
|
|
double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
|
|
double CursorRequestDeliveryTime[],
|
|
double CursorRequestDeliveryTimePrefetch[]);
|
|
|
|
static void CalculateMetaAndPTETimes(
|
|
int NumberOfActivePlanes,
|
|
bool GPUVMEnable,
|
|
int MetaChunkSize,
|
|
int MinMetaChunkSizeBytes,
|
|
int HTotal[],
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double DestinationLinesToRequestRowInVBlank[],
|
|
double DestinationLinesToRequestRowInImmediateFlip[],
|
|
bool DCCEnable[],
|
|
double PixelClock[],
|
|
int BytePerPixelY[],
|
|
int BytePerPixelC[],
|
|
enum scan_direction_class SourceScan[],
|
|
int dpte_row_height[],
|
|
int dpte_row_height_chroma[],
|
|
int meta_row_width[],
|
|
int meta_row_width_chroma[],
|
|
int meta_row_height[],
|
|
int meta_row_height_chroma[],
|
|
int meta_req_width[],
|
|
int meta_req_width_chroma[],
|
|
int meta_req_height[],
|
|
int meta_req_height_chroma[],
|
|
int dpte_group_bytes[],
|
|
int PTERequestSizeY[],
|
|
int PTERequestSizeC[],
|
|
int PixelPTEReqWidthY[],
|
|
int PixelPTEReqHeightY[],
|
|
int PixelPTEReqWidthC[],
|
|
int PixelPTEReqHeightC[],
|
|
int dpte_row_width_luma_ub[],
|
|
int dpte_row_width_chroma_ub[],
|
|
double DST_Y_PER_PTE_ROW_NOM_L[],
|
|
double DST_Y_PER_PTE_ROW_NOM_C[],
|
|
double DST_Y_PER_META_ROW_NOM_L[],
|
|
double DST_Y_PER_META_ROW_NOM_C[],
|
|
double TimePerMetaChunkNominal[],
|
|
double TimePerChromaMetaChunkNominal[],
|
|
double TimePerMetaChunkVBlank[],
|
|
double TimePerChromaMetaChunkVBlank[],
|
|
double TimePerMetaChunkFlip[],
|
|
double TimePerChromaMetaChunkFlip[],
|
|
double time_per_pte_group_nom_luma[],
|
|
double time_per_pte_group_vblank_luma[],
|
|
double time_per_pte_group_flip_luma[],
|
|
double time_per_pte_group_nom_chroma[],
|
|
double time_per_pte_group_vblank_chroma[],
|
|
double time_per_pte_group_flip_chroma[]);
|
|
|
|
static void CalculateVMGroupAndRequestTimes(
|
|
unsigned int NumberOfActivePlanes,
|
|
bool GPUVMEnable,
|
|
unsigned int GPUVMMaxPageTableLevels,
|
|
unsigned int HTotal[],
|
|
int BytePerPixelC[],
|
|
double DestinationLinesToRequestVMInVBlank[],
|
|
double DestinationLinesToRequestVMInImmediateFlip[],
|
|
bool DCCEnable[],
|
|
double PixelClock[],
|
|
int dpte_row_width_luma_ub[],
|
|
int dpte_row_width_chroma_ub[],
|
|
int vm_group_bytes[],
|
|
unsigned int dpde0_bytes_per_frame_ub_l[],
|
|
unsigned int dpde0_bytes_per_frame_ub_c[],
|
|
int meta_pte_bytes_per_frame_ub_l[],
|
|
int meta_pte_bytes_per_frame_ub_c[],
|
|
double TimePerVMGroupVBlank[],
|
|
double TimePerVMGroupFlip[],
|
|
double TimePerVMRequestVBlank[],
|
|
double TimePerVMRequestFlip[]);
|
|
|
|
static void CalculateStutterEfficiency(
|
|
struct display_mode_lib *mode_lib,
|
|
int CompressedBufferSizeInkByte,
|
|
bool UnboundedRequestEnabled,
|
|
int ConfigReturnBufferSizeInKByte,
|
|
int MetaFIFOSizeInKEntries,
|
|
int ZeroSizeBufferEntries,
|
|
int NumberOfActivePlanes,
|
|
int ROBBufferSizeInKByte,
|
|
double TotalDataReadBandwidth,
|
|
double DCFCLK,
|
|
double ReturnBW,
|
|
double COMPBUF_RESERVED_SPACE_64B,
|
|
double COMPBUF_RESERVED_SPACE_ZS,
|
|
double SRExitTime,
|
|
double SRExitZ8Time,
|
|
bool SynchronizedVBlank,
|
|
double Z8StutterEnterPlusExitWatermark,
|
|
double StutterEnterPlusExitWatermark,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
bool Interlace[],
|
|
double MinTTUVBlank[],
|
|
int DPPPerPlane[],
|
|
unsigned int DETBufferSizeY[],
|
|
int BytePerPixelY[],
|
|
double BytePerPixelDETY[],
|
|
double SwathWidthY[],
|
|
int SwathHeightY[],
|
|
int SwathHeightC[],
|
|
double NetDCCRateLuma[],
|
|
double NetDCCRateChroma[],
|
|
double DCCFractionOfZeroSizeRequestsLuma[],
|
|
double DCCFractionOfZeroSizeRequestsChroma[],
|
|
int HTotal[],
|
|
int VTotal[],
|
|
double PixelClock[],
|
|
double VRatio[],
|
|
enum scan_direction_class SourceScan[],
|
|
int BlockHeight256BytesY[],
|
|
int BlockWidth256BytesY[],
|
|
int BlockHeight256BytesC[],
|
|
int BlockWidth256BytesC[],
|
|
int DCCYMaxUncompressedBlock[],
|
|
int DCCCMaxUncompressedBlock[],
|
|
int VActive[],
|
|
bool DCCEnable[],
|
|
bool WritebackEnable[],
|
|
double ReadBandwidthPlaneLuma[],
|
|
double ReadBandwidthPlaneChroma[],
|
|
double meta_row_bw[],
|
|
double dpte_row_bw[],
|
|
double *StutterEfficiencyNotIncludingVBlank,
|
|
double *StutterEfficiency,
|
|
int *NumberOfStutterBurstsPerFrame,
|
|
double *Z8StutterEfficiencyNotIncludingVBlank,
|
|
double *Z8StutterEfficiency,
|
|
int *Z8NumberOfStutterBurstsPerFrame,
|
|
double *StutterPeriod);
|
|
|
|
static void CalculateSwathAndDETConfiguration(
|
|
bool ForceSingleDPP,
|
|
int NumberOfActivePlanes,
|
|
bool DETSharedByAllDPP,
|
|
unsigned int DETBufferSizeInKByte[],
|
|
double MaximumSwathWidthLuma[],
|
|
double MaximumSwathWidthChroma[],
|
|
enum scan_direction_class SourceScan[],
|
|
enum source_format_class SourcePixelFormat[],
|
|
enum dm_swizzle_mode SurfaceTiling[],
|
|
int ViewportWidth[],
|
|
int ViewportHeight[],
|
|
int SurfaceWidthY[],
|
|
int SurfaceWidthC[],
|
|
int SurfaceHeightY[],
|
|
int SurfaceHeightC[],
|
|
int Read256BytesBlockHeightY[],
|
|
int Read256BytesBlockHeightC[],
|
|
int Read256BytesBlockWidthY[],
|
|
int Read256BytesBlockWidthC[],
|
|
enum odm_combine_mode ODMCombineEnabled[],
|
|
int BlendingAndTiming[],
|
|
int BytePerPixY[],
|
|
int BytePerPixC[],
|
|
double BytePerPixDETY[],
|
|
double BytePerPixDETC[],
|
|
int HActive[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
int DPPPerPlane[],
|
|
int swath_width_luma_ub[],
|
|
int swath_width_chroma_ub[],
|
|
double SwathWidth[],
|
|
double SwathWidthChroma[],
|
|
int SwathHeightY[],
|
|
int SwathHeightC[],
|
|
unsigned int DETBufferSizeY[],
|
|
unsigned int DETBufferSizeC[],
|
|
bool ViewportSizeSupportPerPlane[],
|
|
bool *ViewportSizeSupport);
|
|
static void CalculateSwathWidth(
|
|
bool ForceSingleDPP,
|
|
int NumberOfActivePlanes,
|
|
enum source_format_class SourcePixelFormat[],
|
|
enum scan_direction_class SourceScan[],
|
|
int ViewportWidth[],
|
|
int ViewportHeight[],
|
|
int SurfaceWidthY[],
|
|
int SurfaceWidthC[],
|
|
int SurfaceHeightY[],
|
|
int SurfaceHeightC[],
|
|
enum odm_combine_mode ODMCombineEnabled[],
|
|
int BytePerPixY[],
|
|
int BytePerPixC[],
|
|
int Read256BytesBlockHeightY[],
|
|
int Read256BytesBlockHeightC[],
|
|
int Read256BytesBlockWidthY[],
|
|
int Read256BytesBlockWidthC[],
|
|
int BlendingAndTiming[],
|
|
int HActive[],
|
|
double HRatio[],
|
|
int DPPPerPlane[],
|
|
double SwathWidthSingleDPPY[],
|
|
double SwathWidthSingleDPPC[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
int MaximumSwathHeightY[],
|
|
int MaximumSwathHeightC[],
|
|
int swath_width_luma_ub[],
|
|
int swath_width_chroma_ub[]);
|
|
|
|
static double CalculateExtraLatency(
|
|
int RoundTripPingLatencyCycles,
|
|
int ReorderingBytes,
|
|
double DCFCLK,
|
|
int TotalNumberOfActiveDPP,
|
|
int PixelChunkSizeInKByte,
|
|
int TotalNumberOfDCCActiveDPP,
|
|
int MetaChunkSize,
|
|
double ReturnBW,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
int NumberOfActivePlanes,
|
|
int NumberOfDPP[],
|
|
int dpte_group_bytes[],
|
|
double HostVMInefficiencyFactor,
|
|
double HostVMMinPageSize,
|
|
int HostVMMaxNonCachedPageTableLevels);
|
|
|
|
static double CalculateExtraLatencyBytes(
|
|
int ReorderingBytes,
|
|
int TotalNumberOfActiveDPP,
|
|
int PixelChunkSizeInKByte,
|
|
int TotalNumberOfDCCActiveDPP,
|
|
int MetaChunkSize,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
int NumberOfActivePlanes,
|
|
int NumberOfDPP[],
|
|
int dpte_group_bytes[],
|
|
double HostVMInefficiencyFactor,
|
|
double HostVMMinPageSize,
|
|
int HostVMMaxNonCachedPageTableLevels);
|
|
|
|
static double CalculateUrgentLatency(
|
|
double UrgentLatencyPixelDataOnly,
|
|
double UrgentLatencyPixelMixedWithVMData,
|
|
double UrgentLatencyVMDataOnly,
|
|
bool DoUrgentLatencyAdjustment,
|
|
double UrgentLatencyAdjustmentFabricClockComponent,
|
|
double UrgentLatencyAdjustmentFabricClockReference,
|
|
double FabricClockSingle);
|
|
|
|
static void CalculateUnboundedRequestAndCompressedBufferSize(
|
|
unsigned int DETBufferSizeInKByte,
|
|
int ConfigReturnBufferSizeInKByte,
|
|
enum unbounded_requesting_policy UseUnboundedRequestingFinal,
|
|
int TotalActiveDPP,
|
|
bool NoChromaPlanes,
|
|
int MaxNumDPP,
|
|
int CompressedBufferSegmentSizeInkByteFinal,
|
|
enum output_encoder_class *Output,
|
|
bool *UnboundedRequestEnabled,
|
|
int *CompressedBufferSizeInkByte);
|
|
|
|
static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
|
|
|
|
void dml31_recalculate(struct display_mode_lib *mode_lib)
|
|
{
|
|
ModeSupportAndSystemConfiguration(mode_lib);
|
|
PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
|
|
DisplayPipeConfiguration(mode_lib);
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
|
|
#endif
|
|
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
|
|
}
|
|
|
|
static unsigned int dscceComputeDelay(
|
|
unsigned int bpc,
|
|
double BPP,
|
|
unsigned int sliceWidth,
|
|
unsigned int numSlices,
|
|
enum output_format_class pixelFormat,
|
|
enum output_encoder_class Output)
|
|
{
|
|
// valid bpc = source bits per component in the set of {8, 10, 12}
|
|
// valid bpp = increments of 1/16 of a bit
|
|
// min = 6/7/8 in N420/N422/444, respectively
|
|
// max = such that compression is 1:1
|
|
//valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
|
|
//valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
|
|
//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
|
|
|
|
// fixed value
|
|
unsigned int rcModelSize = 8192;
|
|
|
|
// N422/N420 operate at 2 pixels per clock
|
|
unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
|
|
|
|
if (pixelFormat == dm_420)
|
|
pixelsPerClock = 2;
|
|
else if (pixelFormat == dm_444)
|
|
pixelsPerClock = 1;
|
|
else if (pixelFormat == dm_n422)
|
|
pixelsPerClock = 2;
|
|
// #all other modes operate at 1 pixel per clock
|
|
else
|
|
pixelsPerClock = 1;
|
|
|
|
//initial transmit delay as per PPS
|
|
initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
|
|
|
|
//compute ssm delay
|
|
if (bpc == 8)
|
|
D = 81;
|
|
else if (bpc == 10)
|
|
D = 89;
|
|
else
|
|
D = 113;
|
|
|
|
//divide by pixel per cycle to compute slice width as seen by DSC
|
|
w = sliceWidth / pixelsPerClock;
|
|
|
|
//422 mode has an additional cycle of delay
|
|
if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
|
|
s = 0;
|
|
else
|
|
s = 1;
|
|
|
|
//main calculation for the dscce
|
|
ix = initalXmitDelay + 45;
|
|
wx = (w + 2) / 3;
|
|
P = 3 * wx - w;
|
|
l0 = ix / w;
|
|
a = ix + P * l0;
|
|
ax = (a + 2) / 3 + D + 6 + 1;
|
|
L = (ax + wx - 1) / wx;
|
|
if ((ix % w) == 0 && P != 0)
|
|
lstall = 1;
|
|
else
|
|
lstall = 0;
|
|
Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
|
|
|
|
//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
|
|
pixels = Delay * 3 * pixelsPerClock;
|
|
return pixels;
|
|
}
|
|
|
|
static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
|
|
{
|
|
unsigned int Delay = 0;
|
|
|
|
if (pixelFormat == dm_420) {
|
|
// sfr
|
|
Delay = Delay + 2;
|
|
// dsccif
|
|
Delay = Delay + 0;
|
|
// dscc - input deserializer
|
|
Delay = Delay + 3;
|
|
// dscc gets pixels every other cycle
|
|
Delay = Delay + 2;
|
|
// dscc - input cdc fifo
|
|
Delay = Delay + 12;
|
|
// dscc gets pixels every other cycle
|
|
Delay = Delay + 13;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// dscc - output cdc fifo
|
|
Delay = Delay + 7;
|
|
// dscc gets pixels every other cycle
|
|
Delay = Delay + 3;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// dscc - output serializer
|
|
Delay = Delay + 1;
|
|
// sft
|
|
Delay = Delay + 1;
|
|
} else if (pixelFormat == dm_n422) {
|
|
// sfr
|
|
Delay = Delay + 2;
|
|
// dsccif
|
|
Delay = Delay + 1;
|
|
// dscc - input deserializer
|
|
Delay = Delay + 5;
|
|
// dscc - input cdc fifo
|
|
Delay = Delay + 25;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// dscc - output cdc fifo
|
|
Delay = Delay + 10;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// dscc - output serializer
|
|
Delay = Delay + 1;
|
|
// sft
|
|
Delay = Delay + 1;
|
|
} else {
|
|
// sfr
|
|
Delay = Delay + 2;
|
|
// dsccif
|
|
Delay = Delay + 0;
|
|
// dscc - input deserializer
|
|
Delay = Delay + 3;
|
|
// dscc - input cdc fifo
|
|
Delay = Delay + 12;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// dscc - output cdc fifo
|
|
Delay = Delay + 7;
|
|
// dscc - output serializer
|
|
Delay = Delay + 1;
|
|
// dscc - cdc uncertainty
|
|
Delay = Delay + 2;
|
|
// sft
|
|
Delay = Delay + 1;
|
|
}
|
|
|
|
return Delay;
|
|
}
|
|
|
|
static bool CalculatePrefetchSchedule(
|
|
struct display_mode_lib *mode_lib,
|
|
double HostVMInefficiencyFactor,
|
|
Pipe *myPipe,
|
|
unsigned int DSCDelay,
|
|
double DPPCLKDelaySubtotalPlusCNVCFormater,
|
|
double DPPCLKDelaySCL,
|
|
double DPPCLKDelaySCLLBOnly,
|
|
double DPPCLKDelayCNVCCursor,
|
|
double DISPCLKDelaySubtotal,
|
|
unsigned int DPP_RECOUT_WIDTH,
|
|
enum output_format_class OutputFormat,
|
|
unsigned int MaxInterDCNTileRepeaters,
|
|
unsigned int VStartup,
|
|
unsigned int MaxVStartup,
|
|
unsigned int GPUVMPageTableLevels,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
unsigned int HostVMMaxNonCachedPageTableLevels,
|
|
double HostVMMinPageSize,
|
|
bool DynamicMetadataEnable,
|
|
bool DynamicMetadataVMEnabled,
|
|
int DynamicMetadataLinesBeforeActiveRequired,
|
|
unsigned int DynamicMetadataTransmittedBytes,
|
|
double UrgentLatency,
|
|
double UrgentExtraLatency,
|
|
double TCalc,
|
|
unsigned int PDEAndMetaPTEBytesFrame,
|
|
unsigned int MetaRowByte,
|
|
unsigned int PixelPTEBytesPerRow,
|
|
double PrefetchSourceLinesY,
|
|
unsigned int SwathWidthY,
|
|
double VInitPreFillY,
|
|
unsigned int MaxNumSwathY,
|
|
double PrefetchSourceLinesC,
|
|
unsigned int SwathWidthC,
|
|
double VInitPreFillC,
|
|
unsigned int MaxNumSwathC,
|
|
int swath_width_luma_ub,
|
|
int swath_width_chroma_ub,
|
|
unsigned int SwathHeightY,
|
|
unsigned int SwathHeightC,
|
|
double TWait,
|
|
double *DSTXAfterScaler,
|
|
double *DSTYAfterScaler,
|
|
double *DestinationLinesForPrefetch,
|
|
double *PrefetchBandwidth,
|
|
double *DestinationLinesToRequestVMInVBlank,
|
|
double *DestinationLinesToRequestRowInVBlank,
|
|
double *VRatioPrefetchY,
|
|
double *VRatioPrefetchC,
|
|
double *RequiredPrefetchPixDataBWLuma,
|
|
double *RequiredPrefetchPixDataBWChroma,
|
|
bool *NotEnoughTimeForDynamicMetadata,
|
|
double *Tno_bw,
|
|
double *prefetch_vmrow_bw,
|
|
double *Tdmdl_vm,
|
|
double *Tdmdl,
|
|
double *TSetup,
|
|
int *VUpdateOffsetPix,
|
|
double *VUpdateWidthPix,
|
|
double *VReadyOffsetPix)
|
|
{
|
|
bool MyError = false;
|
|
unsigned int DPPCycles, DISPCLKCycles;
|
|
double DSTTotalPixelsAfterScaler;
|
|
double LineTime;
|
|
double dst_y_prefetch_equ;
|
|
double Tsw_oto;
|
|
double prefetch_bw_oto;
|
|
double prefetch_bw_pr;
|
|
double Tvm_oto;
|
|
double Tr0_oto;
|
|
double Tvm_oto_lines;
|
|
double Tr0_oto_lines;
|
|
double dst_y_prefetch_oto;
|
|
double TimeForFetchingMetaPTE = 0;
|
|
double TimeForFetchingRowInVBlank = 0;
|
|
double LinesToRequestPrefetchPixelData = 0;
|
|
unsigned int HostVMDynamicLevelsTrips;
|
|
double trip_to_mem;
|
|
double Tvm_trips;
|
|
double Tr0_trips;
|
|
double Tvm_trips_rounded;
|
|
double Tr0_trips_rounded;
|
|
double Lsw_oto;
|
|
double Tpre_rounded;
|
|
double prefetch_bw_equ;
|
|
double Tvm_equ;
|
|
double Tr0_equ;
|
|
double Tdmbf;
|
|
double Tdmec;
|
|
double Tdmsks;
|
|
double prefetch_sw_bytes;
|
|
double bytes_pp;
|
|
double dep_bytes;
|
|
int max_vratio_pre = 4;
|
|
double min_Lsw;
|
|
double Tsw_est1 = 0;
|
|
double Tsw_est3 = 0;
|
|
double max_Tsw = 0;
|
|
|
|
if (GPUVMEnable == true && HostVMEnable == true) {
|
|
HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
|
|
} else {
|
|
HostVMDynamicLevelsTrips = 0;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
|
|
#endif
|
|
CalculateVupdateAndDynamicMetadataParameters(
|
|
MaxInterDCNTileRepeaters,
|
|
myPipe->DPPCLK,
|
|
myPipe->DISPCLK,
|
|
myPipe->DCFCLKDeepSleep,
|
|
myPipe->PixelClock,
|
|
myPipe->HTotal,
|
|
myPipe->VBlank,
|
|
DynamicMetadataTransmittedBytes,
|
|
DynamicMetadataLinesBeforeActiveRequired,
|
|
myPipe->InterlaceEnable,
|
|
myPipe->ProgressiveToInterlaceUnitInOPP,
|
|
TSetup,
|
|
&Tdmbf,
|
|
&Tdmec,
|
|
&Tdmsks,
|
|
VUpdateOffsetPix,
|
|
VUpdateWidthPix,
|
|
VReadyOffsetPix);
|
|
|
|
LineTime = myPipe->HTotal / myPipe->PixelClock;
|
|
trip_to_mem = UrgentLatency;
|
|
Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
|
|
|
|
#ifdef __DML_VBA_ALLOW_DELTA__
|
|
if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
|
|
#else
|
|
if (DynamicMetadataVMEnabled == true) {
|
|
#endif
|
|
*Tdmdl = TWait + Tvm_trips + trip_to_mem;
|
|
} else {
|
|
*Tdmdl = TWait + UrgentExtraLatency;
|
|
}
|
|
|
|
#ifdef __DML_VBA_ALLOW_DELTA__
|
|
if (DynamicMetadataEnable == false) {
|
|
*Tdmdl = 0.0;
|
|
}
|
|
#endif
|
|
|
|
if (DynamicMetadataEnable == true) {
|
|
if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
|
|
*NotEnoughTimeForDynamicMetadata = true;
|
|
dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
|
|
dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
|
|
dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
|
|
dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
|
|
dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
|
|
} else {
|
|
*NotEnoughTimeForDynamicMetadata = false;
|
|
}
|
|
} else {
|
|
*NotEnoughTimeForDynamicMetadata = false;
|
|
}
|
|
|
|
*Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
|
|
|
|
if (myPipe->ScalerEnabled)
|
|
DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
|
|
else
|
|
DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
|
|
|
|
DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
|
|
|
|
DISPCLKCycles = DISPCLKDelaySubtotal;
|
|
|
|
if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
|
|
return true;
|
|
|
|
*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
|
|
dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
|
|
dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
|
|
dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
|
|
dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
|
|
dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
|
|
dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
|
|
dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
|
|
#endif
|
|
|
|
*DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
|
|
|
|
if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
|
|
*DSTYAfterScaler = 1;
|
|
else
|
|
*DSTYAfterScaler = 0;
|
|
|
|
DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
|
|
*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
|
|
*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
|
|
#endif
|
|
|
|
MyError = false;
|
|
|
|
Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
|
|
Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
|
|
Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
|
|
|
|
#ifdef __DML_VBA_ALLOW_DELTA__
|
|
if (!myPipe->DCCEnable) {
|
|
Tr0_trips = 0.0;
|
|
Tr0_trips_rounded = 0.0;
|
|
}
|
|
#endif
|
|
|
|
if (!GPUVMEnable) {
|
|
Tvm_trips = 0.0;
|
|
Tvm_trips_rounded = 0.0;
|
|
}
|
|
|
|
if (GPUVMEnable) {
|
|
if (GPUVMPageTableLevels >= 3) {
|
|
*Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
|
|
} else {
|
|
*Tno_bw = 0;
|
|
}
|
|
} else if (!myPipe->DCCEnable) {
|
|
*Tno_bw = LineTime;
|
|
} else {
|
|
*Tno_bw = LineTime / 4;
|
|
}
|
|
|
|
if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
|
|
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
|
|
else
|
|
bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
|
|
/*rev 99*/
|
|
prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
|
|
max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
|
|
prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
|
|
prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
|
|
prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
|
|
|
|
min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
|
|
Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
|
|
Tsw_oto = Lsw_oto * LineTime;
|
|
|
|
prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML: HTotal: %d\n", myPipe->HTotal);
|
|
dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
|
|
dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
|
|
dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
|
|
dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
|
|
dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
|
|
#endif
|
|
|
|
if (GPUVMEnable == true)
|
|
Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
|
|
else
|
|
Tvm_oto = LineTime / 4.0;
|
|
|
|
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
|
|
Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
|
|
LineTime - Tvm_oto,
|
|
LineTime / 4);
|
|
} else {
|
|
Tr0_oto = (LineTime - Tvm_oto) / 2.0;
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
|
|
dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
|
|
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
|
|
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
|
|
dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
|
|
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
|
|
dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
|
|
dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
|
|
dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
|
|
#endif
|
|
|
|
Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
|
|
Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
|
|
dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
|
|
dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
|
|
dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
|
|
Tpre_rounded = dst_y_prefetch_equ * LineTime;
|
|
|
|
dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
|
|
|
|
if (prefetch_sw_bytes < dep_bytes)
|
|
prefetch_sw_bytes = 2 * dep_bytes;
|
|
|
|
dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
|
|
dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
|
|
dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
|
|
dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
|
|
dml_print("DML: LineTime: %f\n", LineTime);
|
|
dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
|
|
|
|
dml_print("DML: LineTime: %f\n", LineTime);
|
|
dml_print("DML: VStartup: %d\n", VStartup);
|
|
dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
|
|
dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
|
|
dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
|
|
dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
|
|
dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
|
|
dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
|
|
dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
|
|
dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
|
|
dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
|
|
dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
|
|
dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
|
|
|
|
*PrefetchBandwidth = 0;
|
|
*DestinationLinesToRequestVMInVBlank = 0;
|
|
*DestinationLinesToRequestRowInVBlank = 0;
|
|
*VRatioPrefetchY = 0;
|
|
*VRatioPrefetchC = 0;
|
|
*RequiredPrefetchPixDataBWLuma = 0;
|
|
if (dst_y_prefetch_equ > 1) {
|
|
double PrefetchBandwidth1;
|
|
double PrefetchBandwidth2;
|
|
double PrefetchBandwidth3;
|
|
double PrefetchBandwidth4;
|
|
|
|
if (Tpre_rounded - *Tno_bw > 0) {
|
|
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
|
|
+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
|
|
Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
|
|
} else {
|
|
PrefetchBandwidth1 = 0;
|
|
}
|
|
|
|
if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
|
|
PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
|
|
/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
|
|
}
|
|
|
|
if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
|
|
PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
|
|
else
|
|
PrefetchBandwidth2 = 0;
|
|
|
|
if (Tpre_rounded - Tvm_trips_rounded > 0) {
|
|
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
|
|
+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
|
|
Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
|
|
} else {
|
|
PrefetchBandwidth3 = 0;
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
|
|
dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
|
|
dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
|
|
#endif
|
|
if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
|
|
PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
|
|
/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
|
|
}
|
|
|
|
if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
|
|
PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
|
|
else
|
|
PrefetchBandwidth4 = 0;
|
|
|
|
{
|
|
bool Case1OK;
|
|
bool Case2OK;
|
|
bool Case3OK;
|
|
|
|
if (PrefetchBandwidth1 > 0) {
|
|
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
|
|
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
|
|
Case1OK = true;
|
|
} else {
|
|
Case1OK = false;
|
|
}
|
|
} else {
|
|
Case1OK = false;
|
|
}
|
|
|
|
if (PrefetchBandwidth2 > 0) {
|
|
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
|
|
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
|
|
Case2OK = true;
|
|
} else {
|
|
Case2OK = false;
|
|
}
|
|
} else {
|
|
Case2OK = false;
|
|
}
|
|
|
|
if (PrefetchBandwidth3 > 0) {
|
|
if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
|
|
&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
|
|
Case3OK = true;
|
|
} else {
|
|
Case3OK = false;
|
|
}
|
|
} else {
|
|
Case3OK = false;
|
|
}
|
|
|
|
if (Case1OK) {
|
|
prefetch_bw_equ = PrefetchBandwidth1;
|
|
} else if (Case2OK) {
|
|
prefetch_bw_equ = PrefetchBandwidth2;
|
|
} else if (Case3OK) {
|
|
prefetch_bw_equ = PrefetchBandwidth3;
|
|
} else {
|
|
prefetch_bw_equ = PrefetchBandwidth4;
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
|
|
dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
|
|
dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
|
|
dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
|
|
#endif
|
|
|
|
if (prefetch_bw_equ > 0) {
|
|
if (GPUVMEnable == true) {
|
|
Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
|
|
} else {
|
|
Tvm_equ = LineTime / 4;
|
|
}
|
|
|
|
if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
|
|
Tr0_equ = dml_max4(
|
|
(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
|
|
Tr0_trips,
|
|
(LineTime - Tvm_equ) / 2,
|
|
LineTime / 4);
|
|
} else {
|
|
Tr0_equ = (LineTime - Tvm_equ) / 2;
|
|
}
|
|
} else {
|
|
Tvm_equ = 0;
|
|
Tr0_equ = 0;
|
|
dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
|
|
}
|
|
}
|
|
|
|
if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
|
|
*DestinationLinesForPrefetch = dst_y_prefetch_oto;
|
|
TimeForFetchingMetaPTE = Tvm_oto;
|
|
TimeForFetchingRowInVBlank = Tr0_oto;
|
|
*PrefetchBandwidth = prefetch_bw_oto;
|
|
} else {
|
|
*DestinationLinesForPrefetch = dst_y_prefetch_equ;
|
|
TimeForFetchingMetaPTE = Tvm_equ;
|
|
TimeForFetchingRowInVBlank = Tr0_equ;
|
|
*PrefetchBandwidth = prefetch_bw_equ;
|
|
}
|
|
|
|
*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
|
|
|
|
*DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
|
|
|
|
#ifdef __DML_VBA_ALLOW_DELTA__
|
|
LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
|
|
// See note above dated 5/30/2018
|
|
// - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
|
|
- ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
|
|
#else
|
|
LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
|
|
#endif
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
|
|
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
|
|
dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
|
|
dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
|
|
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
|
|
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
|
|
dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
|
|
#endif
|
|
|
|
if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
|
|
|
|
*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
|
|
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
|
|
dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
|
|
dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
|
|
#endif
|
|
if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
|
|
if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
|
|
*VRatioPrefetchY = dml_max(
|
|
(double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
|
|
(double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
|
|
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
|
|
} else {
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
*VRatioPrefetchY = 0;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
|
|
dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
|
|
dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
|
|
#endif
|
|
}
|
|
|
|
*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
|
|
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
|
|
dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
|
|
dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
|
|
#endif
|
|
if ((SwathHeightC > 4) || VInitPreFillC > 3) {
|
|
if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
|
|
*VRatioPrefetchC = dml_max(
|
|
*VRatioPrefetchC,
|
|
(double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
|
|
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
|
|
} else {
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
*VRatioPrefetchC = 0;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
|
|
dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
|
|
dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
|
|
#endif
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
|
|
dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
|
|
dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
|
|
#endif
|
|
|
|
*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
|
|
#endif
|
|
|
|
*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
|
|
/ LineTime;
|
|
} else {
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
|
|
*VRatioPrefetchY = 0;
|
|
*VRatioPrefetchC = 0;
|
|
*RequiredPrefetchPixDataBWLuma = 0;
|
|
*RequiredPrefetchPixDataBWChroma = 0;
|
|
}
|
|
|
|
dml_print(
|
|
"DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
|
|
(double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
|
|
dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
|
|
dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
|
|
dml_print(
|
|
"DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
|
|
(double) LinesToRequestPrefetchPixelData * LineTime);
|
|
dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
|
|
(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
|
|
(double) myPipe->HTotal)) * LineTime);
|
|
dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
|
|
dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
|
|
VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
|
|
- (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
|
|
dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
|
|
|
|
} else {
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
}
|
|
|
|
{
|
|
double prefetch_vm_bw;
|
|
double prefetch_row_bw;
|
|
|
|
if (PDEAndMetaPTEBytesFrame == 0) {
|
|
prefetch_vm_bw = 0;
|
|
} else if (*DestinationLinesToRequestVMInVBlank > 0) {
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
|
|
dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
|
|
dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
|
|
dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
|
|
#endif
|
|
prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
|
|
#endif
|
|
} else {
|
|
prefetch_vm_bw = 0;
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
}
|
|
|
|
if (MetaRowByte + PixelPTEBytesPerRow == 0) {
|
|
prefetch_row_bw = 0;
|
|
} else if (*DestinationLinesToRequestRowInVBlank > 0) {
|
|
prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
|
|
dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
|
|
dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
|
|
dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
|
|
#endif
|
|
} else {
|
|
prefetch_row_bw = 0;
|
|
MyError = true;
|
|
dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
|
|
}
|
|
|
|
*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
|
|
}
|
|
|
|
if (MyError) {
|
|
*PrefetchBandwidth = 0;
|
|
TimeForFetchingMetaPTE = 0;
|
|
TimeForFetchingRowInVBlank = 0;
|
|
*DestinationLinesToRequestVMInVBlank = 0;
|
|
*DestinationLinesToRequestRowInVBlank = 0;
|
|
*DestinationLinesForPrefetch = 0;
|
|
LinesToRequestPrefetchPixelData = 0;
|
|
*VRatioPrefetchY = 0;
|
|
*VRatioPrefetchC = 0;
|
|
*RequiredPrefetchPixDataBWLuma = 0;
|
|
*RequiredPrefetchPixDataBWChroma = 0;
|
|
}
|
|
|
|
return MyError;
|
|
}
|
|
|
|
static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
|
|
{
|
|
return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
|
|
}
|
|
|
|
static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
|
|
{
|
|
return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
|
|
}
|
|
|
|
static void CalculateDCCConfiguration(
|
|
bool DCCEnabled,
|
|
bool DCCProgrammingAssumesScanDirectionUnknown,
|
|
enum source_format_class SourcePixelFormat,
|
|
unsigned int SurfaceWidthLuma,
|
|
unsigned int SurfaceWidthChroma,
|
|
unsigned int SurfaceHeightLuma,
|
|
unsigned int SurfaceHeightChroma,
|
|
double DETBufferSize,
|
|
unsigned int RequestHeight256ByteLuma,
|
|
unsigned int RequestHeight256ByteChroma,
|
|
enum dm_swizzle_mode TilingFormat,
|
|
unsigned int BytePerPixelY,
|
|
unsigned int BytePerPixelC,
|
|
double BytePerPixelDETY,
|
|
double BytePerPixelDETC,
|
|
enum scan_direction_class ScanOrientation,
|
|
unsigned int *MaxUncompressedBlockLuma,
|
|
unsigned int *MaxUncompressedBlockChroma,
|
|
unsigned int *MaxCompressedBlockLuma,
|
|
unsigned int *MaxCompressedBlockChroma,
|
|
unsigned int *IndependentBlockLuma,
|
|
unsigned int *IndependentBlockChroma)
|
|
{
|
|
int yuv420;
|
|
int horz_div_l;
|
|
int horz_div_c;
|
|
int vert_div_l;
|
|
int vert_div_c;
|
|
|
|
int swath_buf_size;
|
|
double detile_buf_vp_horz_limit;
|
|
double detile_buf_vp_vert_limit;
|
|
|
|
int MAS_vp_horz_limit;
|
|
int MAS_vp_vert_limit;
|
|
int max_vp_horz_width;
|
|
int max_vp_vert_height;
|
|
int eff_surf_width_l;
|
|
int eff_surf_width_c;
|
|
int eff_surf_height_l;
|
|
int eff_surf_height_c;
|
|
|
|
int full_swath_bytes_horz_wc_l;
|
|
int full_swath_bytes_horz_wc_c;
|
|
int full_swath_bytes_vert_wc_l;
|
|
int full_swath_bytes_vert_wc_c;
|
|
int req128_horz_wc_l;
|
|
int req128_horz_wc_c;
|
|
int req128_vert_wc_l;
|
|
int req128_vert_wc_c;
|
|
int segment_order_horz_contiguous_luma;
|
|
int segment_order_horz_contiguous_chroma;
|
|
int segment_order_vert_contiguous_luma;
|
|
int segment_order_vert_contiguous_chroma;
|
|
|
|
typedef enum {
|
|
REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
|
|
} RequestType;
|
|
RequestType RequestLuma;
|
|
RequestType RequestChroma;
|
|
|
|
yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
|
|
horz_div_l = 1;
|
|
horz_div_c = 1;
|
|
vert_div_l = 1;
|
|
vert_div_c = 1;
|
|
|
|
if (BytePerPixelY == 1)
|
|
vert_div_l = 0;
|
|
if (BytePerPixelC == 1)
|
|
vert_div_c = 0;
|
|
if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
|
|
horz_div_l = 0;
|
|
if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
|
|
horz_div_c = 0;
|
|
|
|
if (BytePerPixelC == 0) {
|
|
swath_buf_size = DETBufferSize / 2 - 2 * 256;
|
|
detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
|
|
detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
|
|
} else {
|
|
swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
|
|
detile_buf_vp_horz_limit = (double) swath_buf_size
|
|
/ ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
|
|
+ (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
|
|
detile_buf_vp_vert_limit = (double) swath_buf_size
|
|
/ (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
|
|
}
|
|
|
|
if (SourcePixelFormat == dm_420_10) {
|
|
detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
|
|
detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
|
|
}
|
|
|
|
detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
|
|
detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
|
|
|
|
MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
|
|
MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
|
|
max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
|
|
max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
|
|
eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
|
|
eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
|
|
eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
|
|
eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
|
|
|
|
full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
|
|
full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
|
|
if (BytePerPixelC > 0) {
|
|
full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
|
|
full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
|
|
} else {
|
|
full_swath_bytes_horz_wc_c = 0;
|
|
full_swath_bytes_vert_wc_c = 0;
|
|
}
|
|
|
|
if (SourcePixelFormat == dm_420_10) {
|
|
full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
|
|
full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
|
|
full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
|
|
full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
|
|
}
|
|
|
|
if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
|
|
req128_horz_wc_l = 0;
|
|
req128_horz_wc_c = 0;
|
|
} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
|
|
req128_horz_wc_l = 0;
|
|
req128_horz_wc_c = 1;
|
|
} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
|
|
req128_horz_wc_l = 1;
|
|
req128_horz_wc_c = 0;
|
|
} else {
|
|
req128_horz_wc_l = 1;
|
|
req128_horz_wc_c = 1;
|
|
}
|
|
|
|
if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
|
|
req128_vert_wc_l = 0;
|
|
req128_vert_wc_c = 0;
|
|
} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
|
|
req128_vert_wc_l = 0;
|
|
req128_vert_wc_c = 1;
|
|
} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
|
|
req128_vert_wc_l = 1;
|
|
req128_vert_wc_c = 0;
|
|
} else {
|
|
req128_vert_wc_l = 1;
|
|
req128_vert_wc_c = 1;
|
|
}
|
|
|
|
if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
|
|
segment_order_horz_contiguous_luma = 0;
|
|
} else {
|
|
segment_order_horz_contiguous_luma = 1;
|
|
}
|
|
if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
|
|
|| (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
|
|
segment_order_vert_contiguous_luma = 0;
|
|
} else {
|
|
segment_order_vert_contiguous_luma = 1;
|
|
}
|
|
if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
|
|
segment_order_horz_contiguous_chroma = 0;
|
|
} else {
|
|
segment_order_horz_contiguous_chroma = 1;
|
|
}
|
|
if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
|
|
|| (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
|
|
segment_order_vert_contiguous_chroma = 0;
|
|
} else {
|
|
segment_order_vert_contiguous_chroma = 1;
|
|
}
|
|
|
|
if (DCCProgrammingAssumesScanDirectionUnknown == true) {
|
|
if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
|
|
RequestLuma = REQ_256Bytes;
|
|
} else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
|
|
RequestLuma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestLuma = REQ_128BytesContiguous;
|
|
}
|
|
if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
|
|
RequestChroma = REQ_256Bytes;
|
|
} else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
|
|
RequestChroma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestChroma = REQ_128BytesContiguous;
|
|
}
|
|
} else if (ScanOrientation != dm_vert) {
|
|
if (req128_horz_wc_l == 0) {
|
|
RequestLuma = REQ_256Bytes;
|
|
} else if (segment_order_horz_contiguous_luma == 0) {
|
|
RequestLuma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestLuma = REQ_128BytesContiguous;
|
|
}
|
|
if (req128_horz_wc_c == 0) {
|
|
RequestChroma = REQ_256Bytes;
|
|
} else if (segment_order_horz_contiguous_chroma == 0) {
|
|
RequestChroma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestChroma = REQ_128BytesContiguous;
|
|
}
|
|
} else {
|
|
if (req128_vert_wc_l == 0) {
|
|
RequestLuma = REQ_256Bytes;
|
|
} else if (segment_order_vert_contiguous_luma == 0) {
|
|
RequestLuma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestLuma = REQ_128BytesContiguous;
|
|
}
|
|
if (req128_vert_wc_c == 0) {
|
|
RequestChroma = REQ_256Bytes;
|
|
} else if (segment_order_vert_contiguous_chroma == 0) {
|
|
RequestChroma = REQ_128BytesNonContiguous;
|
|
} else {
|
|
RequestChroma = REQ_128BytesContiguous;
|
|
}
|
|
}
|
|
|
|
if (RequestLuma == REQ_256Bytes) {
|
|
*MaxUncompressedBlockLuma = 256;
|
|
*MaxCompressedBlockLuma = 256;
|
|
*IndependentBlockLuma = 0;
|
|
} else if (RequestLuma == REQ_128BytesContiguous) {
|
|
*MaxUncompressedBlockLuma = 256;
|
|
*MaxCompressedBlockLuma = 128;
|
|
*IndependentBlockLuma = 128;
|
|
} else {
|
|
*MaxUncompressedBlockLuma = 256;
|
|
*MaxCompressedBlockLuma = 64;
|
|
*IndependentBlockLuma = 64;
|
|
}
|
|
|
|
if (RequestChroma == REQ_256Bytes) {
|
|
*MaxUncompressedBlockChroma = 256;
|
|
*MaxCompressedBlockChroma = 256;
|
|
*IndependentBlockChroma = 0;
|
|
} else if (RequestChroma == REQ_128BytesContiguous) {
|
|
*MaxUncompressedBlockChroma = 256;
|
|
*MaxCompressedBlockChroma = 128;
|
|
*IndependentBlockChroma = 128;
|
|
} else {
|
|
*MaxUncompressedBlockChroma = 256;
|
|
*MaxCompressedBlockChroma = 64;
|
|
*IndependentBlockChroma = 64;
|
|
}
|
|
|
|
if (DCCEnabled != true || BytePerPixelC == 0) {
|
|
*MaxUncompressedBlockChroma = 0;
|
|
*MaxCompressedBlockChroma = 0;
|
|
*IndependentBlockChroma = 0;
|
|
}
|
|
|
|
if (DCCEnabled != true) {
|
|
*MaxUncompressedBlockLuma = 0;
|
|
*MaxCompressedBlockLuma = 0;
|
|
*IndependentBlockLuma = 0;
|
|
}
|
|
}
|
|
|
|
static double CalculatePrefetchSourceLines(
|
|
struct display_mode_lib *mode_lib,
|
|
double VRatio,
|
|
double vtaps,
|
|
bool Interlace,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
unsigned int SwathHeight,
|
|
unsigned int ViewportYStart,
|
|
double *VInitPreFill,
|
|
unsigned int *MaxNumSwath)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
unsigned int MaxPartialSwath;
|
|
|
|
if (ProgressiveToInterlaceUnitInOPP)
|
|
*VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
|
|
else
|
|
*VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
|
|
|
|
if (!v->IgnoreViewportPositioning) {
|
|
|
|
*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
|
|
|
|
if (*VInitPreFill > 1.0)
|
|
MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
|
|
else
|
|
MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
|
|
MaxPartialSwath = dml_max(1U, MaxPartialSwath);
|
|
|
|
} else {
|
|
|
|
if (ViewportYStart != 0)
|
|
dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
|
|
|
|
*MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
|
|
|
|
if (*VInitPreFill > 1.0)
|
|
MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
|
|
else
|
|
MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
|
|
dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
|
|
dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
|
|
dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
|
|
dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
|
|
dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
|
|
dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
|
|
dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
|
|
dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
|
|
#endif
|
|
return *MaxNumSwath * SwathHeight + MaxPartialSwath;
|
|
}
|
|
|
|
static unsigned int CalculateVMAndRowBytes(
|
|
struct display_mode_lib *mode_lib,
|
|
bool DCCEnable,
|
|
unsigned int BlockHeight256Bytes,
|
|
unsigned int BlockWidth256Bytes,
|
|
enum source_format_class SourcePixelFormat,
|
|
unsigned int SurfaceTiling,
|
|
unsigned int BytePerPixel,
|
|
enum scan_direction_class ScanDirection,
|
|
unsigned int SwathWidth,
|
|
unsigned int ViewportHeight,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
unsigned int HostVMMaxNonCachedPageTableLevels,
|
|
unsigned int GPUVMMinPageSize,
|
|
unsigned int HostVMMinPageSize,
|
|
unsigned int PTEBufferSizeInRequests,
|
|
unsigned int Pitch,
|
|
unsigned int DCCMetaPitch,
|
|
unsigned int *MacroTileWidth,
|
|
unsigned int *MetaRowByte,
|
|
unsigned int *PixelPTEBytesPerRow,
|
|
bool *PTEBufferSizeNotExceeded,
|
|
int *dpte_row_width_ub,
|
|
unsigned int *dpte_row_height,
|
|
unsigned int *MetaRequestWidth,
|
|
unsigned int *MetaRequestHeight,
|
|
unsigned int *meta_row_width,
|
|
unsigned int *meta_row_height,
|
|
int *vm_group_bytes,
|
|
unsigned int *dpte_group_bytes,
|
|
unsigned int *PixelPTEReqWidth,
|
|
unsigned int *PixelPTEReqHeight,
|
|
unsigned int *PTERequestSize,
|
|
int *DPDE0BytesFrame,
|
|
int *MetaPTEBytesFrame)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
unsigned int MPDEBytesFrame;
|
|
unsigned int DCCMetaSurfaceBytes;
|
|
unsigned int MacroTileSizeBytes;
|
|
unsigned int MacroTileHeight;
|
|
unsigned int ExtraDPDEBytesFrame;
|
|
unsigned int PDEAndMetaPTEBytesFrame;
|
|
unsigned int PixelPTEReqHeightPTEs = 0;
|
|
unsigned int HostVMDynamicLevels = 0;
|
|
double FractionOfPTEReturnDrop;
|
|
|
|
if (GPUVMEnable == true && HostVMEnable == true) {
|
|
if (HostVMMinPageSize < 2048) {
|
|
HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
|
|
} else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
|
|
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
|
|
} else {
|
|
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
|
|
}
|
|
}
|
|
|
|
*MetaRequestHeight = 8 * BlockHeight256Bytes;
|
|
*MetaRequestWidth = 8 * BlockWidth256Bytes;
|
|
if (ScanDirection != dm_vert) {
|
|
*meta_row_height = *MetaRequestHeight;
|
|
*meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
|
|
*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
|
|
} else {
|
|
*meta_row_height = *MetaRequestWidth;
|
|
*meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
|
|
*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
|
|
}
|
|
DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
|
|
if (GPUVMEnable == true) {
|
|
*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
|
|
MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
|
|
} else {
|
|
*MetaPTEBytesFrame = 0;
|
|
MPDEBytesFrame = 0;
|
|
}
|
|
|
|
if (DCCEnable != true) {
|
|
*MetaPTEBytesFrame = 0;
|
|
MPDEBytesFrame = 0;
|
|
*MetaRowByte = 0;
|
|
}
|
|
|
|
if (SurfaceTiling == dm_sw_linear) {
|
|
MacroTileSizeBytes = 256;
|
|
MacroTileHeight = BlockHeight256Bytes;
|
|
} else {
|
|
MacroTileSizeBytes = 65536;
|
|
MacroTileHeight = 16 * BlockHeight256Bytes;
|
|
}
|
|
*MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
|
|
|
|
if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
|
|
if (ScanDirection != dm_vert) {
|
|
*DPDE0BytesFrame = 64
|
|
* (dml_ceil(
|
|
((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
|
|
/ (8 * 2097152),
|
|
1) + 1);
|
|
} else {
|
|
*DPDE0BytesFrame = 64
|
|
* (dml_ceil(
|
|
((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
|
|
/ (8 * 2097152),
|
|
1) + 1);
|
|
}
|
|
ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
|
|
} else {
|
|
*DPDE0BytesFrame = 0;
|
|
ExtraDPDEBytesFrame = 0;
|
|
}
|
|
|
|
PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
|
|
dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
|
|
dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
|
|
dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
|
|
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
|
|
#endif
|
|
|
|
if (HostVMEnable == true) {
|
|
PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
|
|
#endif
|
|
|
|
if (SurfaceTiling == dm_sw_linear) {
|
|
PixelPTEReqHeightPTEs = 1;
|
|
*PixelPTEReqHeight = 1;
|
|
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
|
|
*PTERequestSize = 64;
|
|
FractionOfPTEReturnDrop = 0;
|
|
} else if (MacroTileSizeBytes == 4096) {
|
|
PixelPTEReqHeightPTEs = 1;
|
|
*PixelPTEReqHeight = MacroTileHeight;
|
|
*PixelPTEReqWidth = 8 * *MacroTileWidth;
|
|
*PTERequestSize = 64;
|
|
if (ScanDirection != dm_vert)
|
|
FractionOfPTEReturnDrop = 0;
|
|
else
|
|
FractionOfPTEReturnDrop = 7 / 8;
|
|
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
|
|
PixelPTEReqHeightPTEs = 16;
|
|
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
|
|
*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
|
|
*PTERequestSize = 128;
|
|
FractionOfPTEReturnDrop = 0;
|
|
} else {
|
|
PixelPTEReqHeightPTEs = 1;
|
|
*PixelPTEReqHeight = MacroTileHeight;
|
|
*PixelPTEReqWidth = 8 * *MacroTileWidth;
|
|
*PTERequestSize = 64;
|
|
FractionOfPTEReturnDrop = 0;
|
|
}
|
|
|
|
if (SurfaceTiling == dm_sw_linear) {
|
|
*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
|
|
*dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
|
|
*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
|
|
} else if (ScanDirection != dm_vert) {
|
|
*dpte_row_height = *PixelPTEReqHeight;
|
|
*dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
|
|
*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
|
|
} else {
|
|
*dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
|
|
*dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
|
|
*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
|
|
}
|
|
|
|
if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
|
|
*PTEBufferSizeNotExceeded = true;
|
|
} else {
|
|
*PTEBufferSizeNotExceeded = false;
|
|
}
|
|
|
|
if (GPUVMEnable != true) {
|
|
*PixelPTEBytesPerRow = 0;
|
|
*PTEBufferSizeNotExceeded = true;
|
|
}
|
|
|
|
dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
|
|
|
|
if (HostVMEnable == true) {
|
|
*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
|
|
}
|
|
|
|
if (HostVMEnable == true) {
|
|
*vm_group_bytes = 512;
|
|
*dpte_group_bytes = 512;
|
|
} else if (GPUVMEnable == true) {
|
|
*vm_group_bytes = 2048;
|
|
if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
|
|
*dpte_group_bytes = 512;
|
|
} else {
|
|
*dpte_group_bytes = 2048;
|
|
}
|
|
} else {
|
|
*vm_group_bytes = 0;
|
|
*dpte_group_bytes = 0;
|
|
}
|
|
return PDEAndMetaPTEBytesFrame;
|
|
}
|
|
|
|
static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
unsigned int j, k;
|
|
double HostVMInefficiencyFactor = 1.0;
|
|
bool NoChromaPlanes = true;
|
|
int ReorderBytes;
|
|
double VMDataOnlyReturnBW;
|
|
double MaxTotalRDBandwidth = 0;
|
|
int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
|
|
|
|
v->WritebackDISPCLK = 0.0;
|
|
v->DISPCLKWithRamping = 0;
|
|
v->DISPCLKWithoutRamping = 0;
|
|
v->GlobalDPPCLK = 0.0;
|
|
/* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
|
|
{
|
|
double IdealFabricAndSDPPortBandwidthPerState = dml_min(
|
|
v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
|
|
v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
|
|
double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
|
|
if (v->HostVMEnable != true) {
|
|
v->ReturnBW = dml_min(
|
|
IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
|
|
} else {
|
|
v->ReturnBW = dml_min(
|
|
IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
|
|
}
|
|
}
|
|
/* End DAL custom code */
|
|
|
|
// DISPCLK and DPPCLK Calculation
|
|
//
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->WritebackEnable[k]) {
|
|
v->WritebackDISPCLK = dml_max(
|
|
v->WritebackDISPCLK,
|
|
dml31_CalculateWriteBackDISPCLK(
|
|
v->WritebackPixelFormat[k],
|
|
v->PixelClock[k],
|
|
v->WritebackHRatio[k],
|
|
v->WritebackVRatio[k],
|
|
v->WritebackHTaps[k],
|
|
v->WritebackVTaps[k],
|
|
v->WritebackSourceWidth[k],
|
|
v->WritebackDestinationWidth[k],
|
|
v->HTotal[k],
|
|
v->WritebackLineBufferSize));
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->HRatio[k] > 1) {
|
|
v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
|
|
v->MaxDCHUBToPSCLThroughput,
|
|
v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
|
|
} else {
|
|
v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
|
|
}
|
|
|
|
v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
|
|
* dml_max(
|
|
v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
|
|
dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
|
|
|
|
if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
|
|
v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
|
|
}
|
|
|
|
if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
|
|
&& v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
|
|
v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
|
|
v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
|
|
} else {
|
|
if (v->HRatioChroma[k] > 1) {
|
|
v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
|
|
v->MaxDCHUBToPSCLThroughput,
|
|
v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
|
|
} else {
|
|
v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
|
|
}
|
|
v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
|
|
* dml_max3(
|
|
v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
|
|
v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
|
|
1.0);
|
|
|
|
if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
|
|
v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
|
|
}
|
|
|
|
v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] != k)
|
|
continue;
|
|
if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
|
|
v->DISPCLKWithRamping = dml_max(
|
|
v->DISPCLKWithRamping,
|
|
v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
|
|
* (1 + v->DISPCLKRampingMargin / 100));
|
|
v->DISPCLKWithoutRamping = dml_max(
|
|
v->DISPCLKWithoutRamping,
|
|
v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
|
|
} else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
|
|
v->DISPCLKWithRamping = dml_max(
|
|
v->DISPCLKWithRamping,
|
|
v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
|
|
* (1 + v->DISPCLKRampingMargin / 100));
|
|
v->DISPCLKWithoutRamping = dml_max(
|
|
v->DISPCLKWithoutRamping,
|
|
v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
|
|
} else {
|
|
v->DISPCLKWithRamping = dml_max(
|
|
v->DISPCLKWithRamping,
|
|
v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
|
|
v->DISPCLKWithoutRamping = dml_max(
|
|
v->DISPCLKWithoutRamping,
|
|
v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
|
|
}
|
|
}
|
|
|
|
v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
|
|
v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
|
|
|
|
ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
|
|
v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
|
|
v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
|
|
v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
|
|
v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
|
|
v->DISPCLKDPPCLKVCOSpeed);
|
|
if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
|
|
v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
|
|
} else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
|
|
v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
|
|
} else {
|
|
v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
|
|
}
|
|
v->DISPCLK = v->DISPCLK_calculated;
|
|
DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
|
|
v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
|
|
}
|
|
v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
|
|
DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->DPPCLK[k] = v->DPPCLK_calculated[k];
|
|
}
|
|
|
|
// Urgent and B P-State/DRAM Clock Change Watermark
|
|
DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
|
|
DTRACE(" return_bus_bw = %f", v->ReturnBW);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
dml30_CalculateBytePerPixelAnd256BBlockSizes(
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
&v->BytePerPixelY[k],
|
|
&v->BytePerPixelC[k],
|
|
&v->BytePerPixelDETY[k],
|
|
&v->BytePerPixelDETC[k],
|
|
&v->BlockHeight256BytesY[k],
|
|
&v->BlockHeight256BytesC[k],
|
|
&v->BlockWidth256BytesY[k],
|
|
&v->BlockWidth256BytesC[k]);
|
|
}
|
|
|
|
CalculateSwathWidth(
|
|
false,
|
|
v->NumberOfActivePlanes,
|
|
v->SourcePixelFormat,
|
|
v->SourceScan,
|
|
v->ViewportWidth,
|
|
v->ViewportHeight,
|
|
v->SurfaceWidthY,
|
|
v->SurfaceWidthC,
|
|
v->SurfaceHeightY,
|
|
v->SurfaceHeightC,
|
|
v->ODMCombineEnabled,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->BlockHeight256BytesY,
|
|
v->BlockHeight256BytesC,
|
|
v->BlockWidth256BytesY,
|
|
v->BlockWidth256BytesC,
|
|
v->BlendingAndTiming,
|
|
v->HActive,
|
|
v->HRatio,
|
|
v->DPPPerPlane,
|
|
v->SwathWidthSingleDPPY,
|
|
v->SwathWidthSingleDPPC,
|
|
v->SwathWidthY,
|
|
v->SwathWidthC,
|
|
v->dummyinteger3,
|
|
v->dummyinteger4,
|
|
v->swath_width_luma_ub,
|
|
v->swath_width_chroma_ub);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
|
|
* v->VRatio[k];
|
|
v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
|
|
* v->VRatioChroma[k];
|
|
DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
|
|
}
|
|
|
|
// DCFCLK Deep Sleep
|
|
CalculateDCFCLKDeepSleep(
|
|
mode_lib,
|
|
v->NumberOfActivePlanes,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->VRatio,
|
|
v->VRatioChroma,
|
|
v->SwathWidthY,
|
|
v->SwathWidthC,
|
|
v->DPPPerPlane,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->PixelClock,
|
|
v->PSCL_THROUGHPUT_LUMA,
|
|
v->PSCL_THROUGHPUT_CHROMA,
|
|
v->DPPCLK,
|
|
v->ReadBandwidthPlaneLuma,
|
|
v->ReadBandwidthPlaneChroma,
|
|
v->ReturnBusWidth,
|
|
&v->DCFCLKDeepSleep);
|
|
|
|
// DSCCLK
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
|
|
v->DSCCLK_calculated[k] = 0.0;
|
|
} else {
|
|
if (v->OutputFormat[k] == dm_420)
|
|
v->DSCFormatFactor = 2;
|
|
else if (v->OutputFormat[k] == dm_444)
|
|
v->DSCFormatFactor = 1;
|
|
else if (v->OutputFormat[k] == dm_n422)
|
|
v->DSCFormatFactor = 2;
|
|
else
|
|
v->DSCFormatFactor = 1;
|
|
if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
|
|
v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
|
|
/ (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
|
|
else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
|
|
v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
|
|
/ (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
|
|
else
|
|
v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
|
|
/ (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
|
|
}
|
|
}
|
|
|
|
// DSC Delay
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
double BPP = v->OutputBpp[k];
|
|
|
|
if (v->DSCEnabled[k] && BPP != 0) {
|
|
if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
|
|
v->DSCDelay[k] = dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
BPP,
|
|
dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
|
|
v->NumberOfDSCSlices[k],
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
|
|
} else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
|
|
v->DSCDelay[k] = 2
|
|
* (dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
BPP,
|
|
dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
|
|
v->NumberOfDSCSlices[k] / 2.0,
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
|
|
} else {
|
|
v->DSCDelay[k] = 4
|
|
* (dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
BPP,
|
|
dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
|
|
v->NumberOfDSCSlices[k] / 4.0,
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
|
|
}
|
|
v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
|
|
} else {
|
|
v->DSCDelay[k] = 0;
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k)
|
|
for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
|
|
if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
|
|
v->DSCDelay[k] = v->DSCDelay[j];
|
|
|
|
// Prefetch
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
unsigned int PDEAndMetaPTEBytesFrameY;
|
|
unsigned int PixelPTEBytesPerRowY;
|
|
unsigned int MetaRowByteY;
|
|
unsigned int MetaRowByteC;
|
|
unsigned int PDEAndMetaPTEBytesFrameC;
|
|
unsigned int PixelPTEBytesPerRowC;
|
|
bool PTEBufferSizeNotExceededY;
|
|
bool PTEBufferSizeNotExceededC;
|
|
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
|
|
|| v->SourcePixelFormat[k] == dm_rgbe_alpha) {
|
|
if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
|
|
v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
|
|
v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
|
|
} else {
|
|
v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
|
|
v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
|
|
}
|
|
|
|
PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
|
|
mode_lib,
|
|
v->DCCEnable[k],
|
|
v->BlockHeight256BytesC[k],
|
|
v->BlockWidth256BytesC[k],
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
v->BytePerPixelC[k],
|
|
v->SourceScan[k],
|
|
v->SwathWidthC[k],
|
|
v->ViewportHeightChroma[k],
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->GPUVMMinPageSize,
|
|
v->HostVMMinPageSize,
|
|
v->PTEBufferSizeInRequestsForChroma,
|
|
v->PitchC[k],
|
|
v->DCCMetaPitchC[k],
|
|
&v->MacroTileWidthC[k],
|
|
&MetaRowByteC,
|
|
&PixelPTEBytesPerRowC,
|
|
&PTEBufferSizeNotExceededC,
|
|
&v->dpte_row_width_chroma_ub[k],
|
|
&v->dpte_row_height_chroma[k],
|
|
&v->meta_req_width_chroma[k],
|
|
&v->meta_req_height_chroma[k],
|
|
&v->meta_row_width_chroma[k],
|
|
&v->meta_row_height_chroma[k],
|
|
&v->dummyinteger1,
|
|
&v->dummyinteger2,
|
|
&v->PixelPTEReqWidthC[k],
|
|
&v->PixelPTEReqHeightC[k],
|
|
&v->PTERequestSizeC[k],
|
|
&v->dpde0_bytes_per_frame_ub_c[k],
|
|
&v->meta_pte_bytes_per_frame_ub_c[k]);
|
|
|
|
v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
|
|
mode_lib,
|
|
v->VRatioChroma[k],
|
|
v->VTAPsChroma[k],
|
|
v->Interlace[k],
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
v->SwathHeightC[k],
|
|
v->ViewportYStartC[k],
|
|
&v->VInitPreFillC[k],
|
|
&v->MaxNumSwathC[k]);
|
|
} else {
|
|
v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
|
|
v->PTEBufferSizeInRequestsForChroma = 0;
|
|
PixelPTEBytesPerRowC = 0;
|
|
PDEAndMetaPTEBytesFrameC = 0;
|
|
MetaRowByteC = 0;
|
|
v->MaxNumSwathC[k] = 0;
|
|
v->PrefetchSourceLinesC[k] = 0;
|
|
}
|
|
|
|
PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
|
|
mode_lib,
|
|
v->DCCEnable[k],
|
|
v->BlockHeight256BytesY[k],
|
|
v->BlockWidth256BytesY[k],
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
v->BytePerPixelY[k],
|
|
v->SourceScan[k],
|
|
v->SwathWidthY[k],
|
|
v->ViewportHeight[k],
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->GPUVMMinPageSize,
|
|
v->HostVMMinPageSize,
|
|
v->PTEBufferSizeInRequestsForLuma,
|
|
v->PitchY[k],
|
|
v->DCCMetaPitchY[k],
|
|
&v->MacroTileWidthY[k],
|
|
&MetaRowByteY,
|
|
&PixelPTEBytesPerRowY,
|
|
&PTEBufferSizeNotExceededY,
|
|
&v->dpte_row_width_luma_ub[k],
|
|
&v->dpte_row_height[k],
|
|
&v->meta_req_width[k],
|
|
&v->meta_req_height[k],
|
|
&v->meta_row_width[k],
|
|
&v->meta_row_height[k],
|
|
&v->vm_group_bytes[k],
|
|
&v->dpte_group_bytes[k],
|
|
&v->PixelPTEReqWidthY[k],
|
|
&v->PixelPTEReqHeightY[k],
|
|
&v->PTERequestSizeY[k],
|
|
&v->dpde0_bytes_per_frame_ub_l[k],
|
|
&v->meta_pte_bytes_per_frame_ub_l[k]);
|
|
|
|
v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
|
|
mode_lib,
|
|
v->VRatio[k],
|
|
v->vtaps[k],
|
|
v->Interlace[k],
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
v->SwathHeightY[k],
|
|
v->ViewportYStartY[k],
|
|
&v->VInitPreFillY[k],
|
|
&v->MaxNumSwathY[k]);
|
|
v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
|
|
v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
|
|
v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
|
|
|
|
CalculateRowBandwidth(
|
|
v->GPUVMEnable,
|
|
v->SourcePixelFormat[k],
|
|
v->VRatio[k],
|
|
v->VRatioChroma[k],
|
|
v->DCCEnable[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
MetaRowByteY,
|
|
MetaRowByteC,
|
|
v->meta_row_height[k],
|
|
v->meta_row_height_chroma[k],
|
|
PixelPTEBytesPerRowY,
|
|
PixelPTEBytesPerRowC,
|
|
v->dpte_row_height[k],
|
|
v->dpte_row_height_chroma[k],
|
|
&v->meta_row_bw[k],
|
|
&v->dpte_row_bw[k]);
|
|
}
|
|
|
|
v->TotalDCCActiveDPP = 0;
|
|
v->TotalActiveDPP = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
|
|
if (v->DCCEnable[k])
|
|
v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
|
|
|| v->SourcePixelFormat[k] == dm_rgbe_alpha)
|
|
NoChromaPlanes = false;
|
|
}
|
|
|
|
ReorderBytes = v->NumberOfChannels
|
|
* dml_max3(
|
|
v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
|
|
v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
|
|
v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
|
|
|
|
VMDataOnlyReturnBW = dml_min(
|
|
dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
|
|
* v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
|
|
* v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
|
|
dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
|
|
dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
|
|
dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
|
|
dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
|
|
dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
|
|
dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
|
|
dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
|
|
dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
|
|
dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
|
|
dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
|
|
#endif
|
|
|
|
if (v->GPUVMEnable && v->HostVMEnable)
|
|
HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
|
|
|
|
v->UrgentExtraLatency = CalculateExtraLatency(
|
|
v->RoundTripPingLatencyCycles,
|
|
ReorderBytes,
|
|
v->DCFCLK,
|
|
v->TotalActiveDPP,
|
|
v->PixelChunkSizeInKByte,
|
|
v->TotalDCCActiveDPP,
|
|
v->MetaChunkSize,
|
|
v->ReturnBW,
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->NumberOfActivePlanes,
|
|
v->DPPPerPlane,
|
|
v->dpte_group_bytes,
|
|
HostVMInefficiencyFactor,
|
|
v->HostVMMinPageSize,
|
|
v->HostVMMaxNonCachedPageTableLevels);
|
|
|
|
v->TCalc = 24.0 / v->DCFCLKDeepSleep;
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
|
|
+ CalculateWriteBackDelay(
|
|
v->WritebackPixelFormat[k],
|
|
v->WritebackHRatio[k],
|
|
v->WritebackVRatio[k],
|
|
v->WritebackVTaps[k],
|
|
v->WritebackDestinationWidth[k],
|
|
v->WritebackDestinationHeight[k],
|
|
v->WritebackSourceHeight[k],
|
|
v->HTotal[k]) / v->DISPCLK;
|
|
} else
|
|
v->WritebackDelay[v->VoltageLevel][k] = 0;
|
|
for (j = 0; j < v->NumberOfActivePlanes; ++j) {
|
|
if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
|
|
v->WritebackDelay[v->VoltageLevel][k] = dml_max(
|
|
v->WritebackDelay[v->VoltageLevel][k],
|
|
v->WritebackLatency
|
|
+ CalculateWriteBackDelay(
|
|
v->WritebackPixelFormat[j],
|
|
v->WritebackHRatio[j],
|
|
v->WritebackVRatio[j],
|
|
v->WritebackVTaps[j],
|
|
v->WritebackDestinationWidth[j],
|
|
v->WritebackDestinationHeight[j],
|
|
v->WritebackSourceHeight[j],
|
|
v->HTotal[k]) / v->DISPCLK);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k)
|
|
for (j = 0; j < v->NumberOfActivePlanes; ++j)
|
|
if (v->BlendingAndTiming[k] == j)
|
|
v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->MaxVStartupLines[k] =
|
|
(v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
|
|
dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
|
|
v->VTotal[k] - v->VActive[k]
|
|
- dml_max(
|
|
1.0,
|
|
dml_ceil(
|
|
(double) v->WritebackDelay[v->VoltageLevel][k]
|
|
/ (v->HTotal[k] / v->PixelClock[k]),
|
|
1));
|
|
if (v->MaxVStartupLines[k] > 1023)
|
|
v->MaxVStartupLines[k] = 1023;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
|
|
dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
|
|
dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
|
|
#endif
|
|
}
|
|
|
|
v->MaximumMaxVStartupLines = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k)
|
|
v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
|
|
|
|
// VBA_DELTA
|
|
// We don't really care to iterate between the various prefetch modes
|
|
//v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
|
|
|
|
v->UrgentLatency = CalculateUrgentLatency(
|
|
v->UrgentLatencyPixelDataOnly,
|
|
v->UrgentLatencyPixelMixedWithVMData,
|
|
v->UrgentLatencyVMDataOnly,
|
|
v->DoUrgentLatencyAdjustment,
|
|
v->UrgentLatencyAdjustmentFabricClockComponent,
|
|
v->UrgentLatencyAdjustmentFabricClockReference,
|
|
v->FabricClock);
|
|
|
|
v->FractionOfUrgentBandwidth = 0.0;
|
|
v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
|
|
|
|
v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
|
|
|
|
do {
|
|
double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
|
|
bool DestinationLineTimesForPrefetchLessThan2 = false;
|
|
bool VRatioPrefetchMoreThan4 = false;
|
|
double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
|
|
MaxTotalRDBandwidth = 0;
|
|
|
|
dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
Pipe myPipe;
|
|
|
|
myPipe.DPPCLK = v->DPPCLK[k];
|
|
myPipe.DISPCLK = v->DISPCLK;
|
|
myPipe.PixelClock = v->PixelClock[k];
|
|
myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
|
|
myPipe.DPPPerPlane = v->DPPPerPlane[k];
|
|
myPipe.ScalerEnabled = v->ScalerEnabled[k];
|
|
myPipe.VRatio = v->VRatio[k];
|
|
myPipe.VRatioChroma = v->VRatioChroma[k];
|
|
myPipe.SourceScan = v->SourceScan[k];
|
|
myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
|
|
myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
|
|
myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
|
|
myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
|
|
myPipe.InterlaceEnable = v->Interlace[k];
|
|
myPipe.NumberOfCursors = v->NumberOfCursors[k];
|
|
myPipe.VBlank = v->VTotal[k] - v->VActive[k];
|
|
myPipe.HTotal = v->HTotal[k];
|
|
myPipe.DCCEnable = v->DCCEnable[k];
|
|
myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
|
|
|| v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
|
|
myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
|
|
myPipe.BytePerPixelY = v->BytePerPixelY[k];
|
|
myPipe.BytePerPixelC = v->BytePerPixelC[k];
|
|
myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
|
|
v->ErrorResult[k] = CalculatePrefetchSchedule(
|
|
mode_lib,
|
|
HostVMInefficiencyFactor,
|
|
&myPipe,
|
|
v->DSCDelay[k],
|
|
v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
|
|
v->DPPCLKDelaySCL,
|
|
v->DPPCLKDelaySCLLBOnly,
|
|
v->DPPCLKDelayCNVCCursor,
|
|
v->DISPCLKDelaySubtotal,
|
|
(unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
|
|
v->OutputFormat[k],
|
|
v->MaxInterDCNTileRepeaters,
|
|
dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
|
|
v->MaxVStartupLines[k],
|
|
v->GPUVMMaxPageTableLevels,
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->HostVMMinPageSize,
|
|
v->DynamicMetadataEnable[k],
|
|
v->DynamicMetadataVMEnabled,
|
|
v->DynamicMetadataLinesBeforeActiveRequired[k],
|
|
v->DynamicMetadataTransmittedBytes[k],
|
|
v->UrgentLatency,
|
|
v->UrgentExtraLatency,
|
|
v->TCalc,
|
|
v->PDEAndMetaPTEBytesFrame[k],
|
|
v->MetaRowByte[k],
|
|
v->PixelPTEBytesPerRow[k],
|
|
v->PrefetchSourceLinesY[k],
|
|
v->SwathWidthY[k],
|
|
v->VInitPreFillY[k],
|
|
v->MaxNumSwathY[k],
|
|
v->PrefetchSourceLinesC[k],
|
|
v->SwathWidthC[k],
|
|
v->VInitPreFillC[k],
|
|
v->MaxNumSwathC[k],
|
|
v->swath_width_luma_ub[k],
|
|
v->swath_width_chroma_ub[k],
|
|
v->SwathHeightY[k],
|
|
v->SwathHeightC[k],
|
|
TWait,
|
|
&v->DSTXAfterScaler[k],
|
|
&v->DSTYAfterScaler[k],
|
|
&v->DestinationLinesForPrefetch[k],
|
|
&v->PrefetchBandwidth[k],
|
|
&v->DestinationLinesToRequestVMInVBlank[k],
|
|
&v->DestinationLinesToRequestRowInVBlank[k],
|
|
&v->VRatioPrefetchY[k],
|
|
&v->VRatioPrefetchC[k],
|
|
&v->RequiredPrefetchPixDataBWLuma[k],
|
|
&v->RequiredPrefetchPixDataBWChroma[k],
|
|
&v->NotEnoughTimeForDynamicMetadata[k],
|
|
&v->Tno_bw[k],
|
|
&v->prefetch_vmrow_bw[k],
|
|
&v->Tdmdl_vm[k],
|
|
&v->Tdmdl[k],
|
|
&v->TSetup[k],
|
|
&v->VUpdateOffsetPix[k],
|
|
&v->VUpdateWidthPix[k],
|
|
&v->VReadyOffsetPix[k]);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
|
|
#endif
|
|
v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
|
|
}
|
|
|
|
v->NoEnoughUrgentLatencyHiding = false;
|
|
v->NoEnoughUrgentLatencyHidingPre = false;
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
|
|
v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
|
|
|
|
CalculateUrgentBurstFactor(
|
|
v->swath_width_luma_ub[k],
|
|
v->swath_width_chroma_ub[k],
|
|
v->SwathHeightY[k],
|
|
v->SwathHeightC[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
v->UrgentLatency,
|
|
v->CursorBufferSize,
|
|
v->CursorWidth[k][0],
|
|
v->CursorBPP[k][0],
|
|
v->VRatio[k],
|
|
v->VRatioChroma[k],
|
|
v->BytePerPixelDETY[k],
|
|
v->BytePerPixelDETC[k],
|
|
v->DETBufferSizeY[k],
|
|
v->DETBufferSizeC[k],
|
|
&v->UrgBurstFactorCursor[k],
|
|
&v->UrgBurstFactorLuma[k],
|
|
&v->UrgBurstFactorChroma[k],
|
|
&v->NoUrgentLatencyHiding[k]);
|
|
|
|
CalculateUrgentBurstFactor(
|
|
v->swath_width_luma_ub[k],
|
|
v->swath_width_chroma_ub[k],
|
|
v->SwathHeightY[k],
|
|
v->SwathHeightC[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
v->UrgentLatency,
|
|
v->CursorBufferSize,
|
|
v->CursorWidth[k][0],
|
|
v->CursorBPP[k][0],
|
|
v->VRatioPrefetchY[k],
|
|
v->VRatioPrefetchC[k],
|
|
v->BytePerPixelDETY[k],
|
|
v->BytePerPixelDETC[k],
|
|
v->DETBufferSizeY[k],
|
|
v->DETBufferSizeC[k],
|
|
&v->UrgBurstFactorCursorPre[k],
|
|
&v->UrgBurstFactorLumaPre[k],
|
|
&v->UrgBurstFactorChromaPre[k],
|
|
&v->NoUrgentLatencyHidingPre[k]);
|
|
|
|
MaxTotalRDBandwidth = MaxTotalRDBandwidth
|
|
+ dml_max3(
|
|
v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
|
|
v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
|
|
+ v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
|
|
+ v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
|
|
+ v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
|
|
v->DPPPerPlane[k]
|
|
* (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
|
|
|
|
MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
|
|
+ dml_max3(
|
|
v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
|
|
v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
|
|
+ v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
|
|
v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
|
|
+ v->cursor_bw_pre[k]);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
|
|
dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
|
|
dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
|
|
dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
|
|
dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
|
|
|
|
dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
|
|
dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
|
|
|
|
dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
|
|
dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
|
|
dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
|
|
dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
|
|
dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
|
|
dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
|
|
dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
|
|
dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
|
|
dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
|
|
dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
|
|
#endif
|
|
|
|
if (v->DestinationLinesForPrefetch[k] < 2)
|
|
DestinationLineTimesForPrefetchLessThan2 = true;
|
|
|
|
if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
|
|
VRatioPrefetchMoreThan4 = true;
|
|
|
|
if (v->NoUrgentLatencyHiding[k] == true)
|
|
v->NoEnoughUrgentLatencyHiding = true;
|
|
|
|
if (v->NoUrgentLatencyHidingPre[k] == true)
|
|
v->NoEnoughUrgentLatencyHidingPre = true;
|
|
}
|
|
|
|
v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
|
|
dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
|
|
dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
|
|
#endif
|
|
|
|
if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
|
|
&& !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
|
|
v->PrefetchModeSupported = true;
|
|
else {
|
|
v->PrefetchModeSupported = false;
|
|
dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
|
|
dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
|
|
dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
|
|
dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
|
|
}
|
|
|
|
// PREVIOUS_ERROR
|
|
// This error result check was done after the PrefetchModeSupported. So we will
|
|
// still try to calculate flip schedule even prefetch mode not supported
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
|
|
v->PrefetchModeSupported = false;
|
|
dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
|
|
}
|
|
}
|
|
|
|
if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
|
|
v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
|
|
- dml_max(
|
|
v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
|
|
+ v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
|
|
+ v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
|
|
v->DPPPerPlane[k]
|
|
* (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
|
|
}
|
|
|
|
v->TotImmediateFlipBytes = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
|
|
+ v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
CalculateFlipSchedule(
|
|
mode_lib,
|
|
k,
|
|
HostVMInefficiencyFactor,
|
|
v->UrgentExtraLatency,
|
|
v->UrgentLatency,
|
|
v->PDEAndMetaPTEBytesFrame[k],
|
|
v->MetaRowByte[k],
|
|
v->PixelPTEBytesPerRow[k]);
|
|
}
|
|
|
|
v->total_dcn_read_bw_with_flip = 0.0;
|
|
v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
|
|
+ dml_max3(
|
|
v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
|
|
v->DPPPerPlane[k] * v->final_flip_bw[k]
|
|
+ v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
|
|
+ v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
|
|
+ v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
|
|
v->DPPPerPlane[k]
|
|
* (v->final_flip_bw[k]
|
|
+ v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
|
|
v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
|
|
+ dml_max3(
|
|
v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
|
|
v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
|
|
+ v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
|
|
v->DPPPerPlane[k]
|
|
* (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
|
|
+ v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
|
|
}
|
|
v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
|
|
|
|
v->ImmediateFlipSupported = true;
|
|
if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
|
|
#endif
|
|
v->ImmediateFlipSupported = false;
|
|
v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->ImmediateFlipSupportedForPipe[k] == false) {
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Pipe %0d not supporting iflip\n",
|
|
__func__, k);
|
|
#endif
|
|
v->ImmediateFlipSupported = false;
|
|
}
|
|
}
|
|
} else {
|
|
v->ImmediateFlipSupported = false;
|
|
}
|
|
|
|
v->PrefetchAndImmediateFlipSupported =
|
|
(v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
|
|
&& v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
|
|
v->ImmediateFlipSupported)) ? true : false;
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
|
|
dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
|
|
dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
|
|
dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
|
|
dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
|
|
dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
|
|
#endif
|
|
dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
|
|
|
|
v->VStartupLines = v->VStartupLines + 1;
|
|
} while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
|
|
ASSERT(v->PrefetchAndImmediateFlipSupported);
|
|
|
|
// Unbounded Request Enabled
|
|
CalculateUnboundedRequestAndCompressedBufferSize(
|
|
v->DETBufferSizeInKByte[0],
|
|
v->ConfigReturnBufferSizeInKByte,
|
|
v->UseUnboundedRequesting,
|
|
v->TotalActiveDPP,
|
|
NoChromaPlanes,
|
|
v->MaxNumDPP,
|
|
v->CompressedBufferSegmentSizeInkByte,
|
|
v->Output,
|
|
&v->UnboundedRequestEnabled,
|
|
&v->CompressedBufferSizeInkByte);
|
|
|
|
//Watermarks and NB P-State/DRAM Clock Change Support
|
|
{
|
|
enum clock_change_support DRAMClockChangeSupport; // dummy
|
|
CalculateWatermarksAndDRAMSpeedChangeSupport(
|
|
mode_lib,
|
|
PrefetchMode,
|
|
v->DCFCLK,
|
|
v->ReturnBW,
|
|
v->UrgentLatency,
|
|
v->UrgentExtraLatency,
|
|
v->SOCCLK,
|
|
v->DCFCLKDeepSleep,
|
|
v->DETBufferSizeY,
|
|
v->DETBufferSizeC,
|
|
v->SwathHeightY,
|
|
v->SwathHeightC,
|
|
v->SwathWidthY,
|
|
v->SwathWidthC,
|
|
v->DPPPerPlane,
|
|
v->BytePerPixelDETY,
|
|
v->BytePerPixelDETC,
|
|
v->UnboundedRequestEnabled,
|
|
v->CompressedBufferSizeInkByte,
|
|
&DRAMClockChangeSupport,
|
|
&v->StutterExitWatermark,
|
|
&v->StutterEnterPlusExitWatermark,
|
|
&v->Z8StutterExitWatermark,
|
|
&v->Z8StutterEnterPlusExitWatermark);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
|
|
0,
|
|
v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
|
|
} else {
|
|
v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
//Display Pipeline Delivery Time in Prefetch, Groups
|
|
CalculatePixelDeliveryTimes(
|
|
v->NumberOfActivePlanes,
|
|
v->VRatio,
|
|
v->VRatioChroma,
|
|
v->VRatioPrefetchY,
|
|
v->VRatioPrefetchC,
|
|
v->swath_width_luma_ub,
|
|
v->swath_width_chroma_ub,
|
|
v->DPPPerPlane,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->PixelClock,
|
|
v->PSCL_THROUGHPUT_LUMA,
|
|
v->PSCL_THROUGHPUT_CHROMA,
|
|
v->DPPCLK,
|
|
v->BytePerPixelC,
|
|
v->SourceScan,
|
|
v->NumberOfCursors,
|
|
v->CursorWidth,
|
|
v->CursorBPP,
|
|
v->BlockWidth256BytesY,
|
|
v->BlockHeight256BytesY,
|
|
v->BlockWidth256BytesC,
|
|
v->BlockHeight256BytesC,
|
|
v->DisplayPipeLineDeliveryTimeLuma,
|
|
v->DisplayPipeLineDeliveryTimeChroma,
|
|
v->DisplayPipeLineDeliveryTimeLumaPrefetch,
|
|
v->DisplayPipeLineDeliveryTimeChromaPrefetch,
|
|
v->DisplayPipeRequestDeliveryTimeLuma,
|
|
v->DisplayPipeRequestDeliveryTimeChroma,
|
|
v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
|
|
v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
|
|
v->CursorRequestDeliveryTime,
|
|
v->CursorRequestDeliveryTimePrefetch);
|
|
|
|
CalculateMetaAndPTETimes(
|
|
v->NumberOfActivePlanes,
|
|
v->GPUVMEnable,
|
|
v->MetaChunkSize,
|
|
v->MinMetaChunkSizeBytes,
|
|
v->HTotal,
|
|
v->VRatio,
|
|
v->VRatioChroma,
|
|
v->DestinationLinesToRequestRowInVBlank,
|
|
v->DestinationLinesToRequestRowInImmediateFlip,
|
|
v->DCCEnable,
|
|
v->PixelClock,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->SourceScan,
|
|
v->dpte_row_height,
|
|
v->dpte_row_height_chroma,
|
|
v->meta_row_width,
|
|
v->meta_row_width_chroma,
|
|
v->meta_row_height,
|
|
v->meta_row_height_chroma,
|
|
v->meta_req_width,
|
|
v->meta_req_width_chroma,
|
|
v->meta_req_height,
|
|
v->meta_req_height_chroma,
|
|
v->dpte_group_bytes,
|
|
v->PTERequestSizeY,
|
|
v->PTERequestSizeC,
|
|
v->PixelPTEReqWidthY,
|
|
v->PixelPTEReqHeightY,
|
|
v->PixelPTEReqWidthC,
|
|
v->PixelPTEReqHeightC,
|
|
v->dpte_row_width_luma_ub,
|
|
v->dpte_row_width_chroma_ub,
|
|
v->DST_Y_PER_PTE_ROW_NOM_L,
|
|
v->DST_Y_PER_PTE_ROW_NOM_C,
|
|
v->DST_Y_PER_META_ROW_NOM_L,
|
|
v->DST_Y_PER_META_ROW_NOM_C,
|
|
v->TimePerMetaChunkNominal,
|
|
v->TimePerChromaMetaChunkNominal,
|
|
v->TimePerMetaChunkVBlank,
|
|
v->TimePerChromaMetaChunkVBlank,
|
|
v->TimePerMetaChunkFlip,
|
|
v->TimePerChromaMetaChunkFlip,
|
|
v->time_per_pte_group_nom_luma,
|
|
v->time_per_pte_group_vblank_luma,
|
|
v->time_per_pte_group_flip_luma,
|
|
v->time_per_pte_group_nom_chroma,
|
|
v->time_per_pte_group_vblank_chroma,
|
|
v->time_per_pte_group_flip_chroma);
|
|
|
|
CalculateVMGroupAndRequestTimes(
|
|
v->NumberOfActivePlanes,
|
|
v->GPUVMEnable,
|
|
v->GPUVMMaxPageTableLevels,
|
|
v->HTotal,
|
|
v->BytePerPixelC,
|
|
v->DestinationLinesToRequestVMInVBlank,
|
|
v->DestinationLinesToRequestVMInImmediateFlip,
|
|
v->DCCEnable,
|
|
v->PixelClock,
|
|
v->dpte_row_width_luma_ub,
|
|
v->dpte_row_width_chroma_ub,
|
|
v->vm_group_bytes,
|
|
v->dpde0_bytes_per_frame_ub_l,
|
|
v->dpde0_bytes_per_frame_ub_c,
|
|
v->meta_pte_bytes_per_frame_ub_l,
|
|
v->meta_pte_bytes_per_frame_ub_c,
|
|
v->TimePerVMGroupVBlank,
|
|
v->TimePerVMGroupFlip,
|
|
v->TimePerVMRequestVBlank,
|
|
v->TimePerVMRequestFlip);
|
|
|
|
// Min TTUVBlank
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (PrefetchMode == 0) {
|
|
v->AllowDRAMClockChangeDuringVBlank[k] = true;
|
|
v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
|
|
v->MinTTUVBlank[k] = dml_max(
|
|
v->DRAMClockChangeWatermark,
|
|
dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
|
|
} else if (PrefetchMode == 1) {
|
|
v->AllowDRAMClockChangeDuringVBlank[k] = false;
|
|
v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
|
|
v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
|
|
} else {
|
|
v->AllowDRAMClockChangeDuringVBlank[k] = false;
|
|
v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
|
|
v->MinTTUVBlank[k] = v->UrgentWatermark;
|
|
}
|
|
if (!v->DynamicMetadataEnable[k])
|
|
v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
|
|
}
|
|
|
|
// DCC Configuration
|
|
v->ActiveDPPs = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceWidthY[k],
|
|
v->SurfaceWidthC[k],
|
|
v->SurfaceHeightY[k],
|
|
v->SurfaceHeightC[k],
|
|
v->DETBufferSizeInKByte[k] * 1024,
|
|
v->BlockHeight256BytesY[k],
|
|
v->BlockHeight256BytesC[k],
|
|
v->SurfaceTiling[k],
|
|
v->BytePerPixelY[k],
|
|
v->BytePerPixelC[k],
|
|
v->BytePerPixelDETY[k],
|
|
v->BytePerPixelDETC[k],
|
|
v->SourceScan[k],
|
|
&v->DCCYMaxUncompressedBlock[k],
|
|
&v->DCCCMaxUncompressedBlock[k],
|
|
&v->DCCYMaxCompressedBlock[k],
|
|
&v->DCCCMaxCompressedBlock[k],
|
|
&v->DCCYIndependentBlock[k],
|
|
&v->DCCCIndependentBlock[k]);
|
|
}
|
|
|
|
// VStartup Adjustment
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
bool isInterlaceTiming;
|
|
double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
|
|
#endif
|
|
|
|
v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
|
|
dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
|
|
dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
|
|
dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
|
|
#endif
|
|
|
|
v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
|
|
if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
|
|
v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
|
|
}
|
|
|
|
isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
|
|
|
|
v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
|
|
- v->VFrontPorch[k])
|
|
+ dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
|
|
+ dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
|
|
|
|
v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
|
|
|
|
if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
|
|
<= (isInterlaceTiming ?
|
|
dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
|
|
(int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
|
|
v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
|
|
} else {
|
|
v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
|
|
dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
|
|
dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
|
|
dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
|
|
dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
|
|
dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
|
|
dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
|
|
dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
|
|
dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
|
|
dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
|
|
dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
|
|
#endif
|
|
}
|
|
|
|
{
|
|
//Maximum Bandwidth Used
|
|
double TotalWRBandwidth = 0;
|
|
double MaxPerPlaneVActiveWRBandwidth = 0;
|
|
double WRBandwidth = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
|
|
WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
|
|
/ (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
|
|
} else if (v->WritebackEnable[k] == true) {
|
|
WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
|
|
/ (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
|
|
}
|
|
TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
|
|
MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
|
|
}
|
|
|
|
v->TotalDataReadBandwidth = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
|
|
}
|
|
}
|
|
// Stutter Efficiency
|
|
CalculateStutterEfficiency(
|
|
mode_lib,
|
|
v->CompressedBufferSizeInkByte,
|
|
v->UnboundedRequestEnabled,
|
|
v->ConfigReturnBufferSizeInKByte,
|
|
v->MetaFIFOSizeInKEntries,
|
|
v->ZeroSizeBufferEntries,
|
|
v->NumberOfActivePlanes,
|
|
v->ROBBufferSizeInKByte,
|
|
v->TotalDataReadBandwidth,
|
|
v->DCFCLK,
|
|
v->ReturnBW,
|
|
v->COMPBUF_RESERVED_SPACE_64B,
|
|
v->COMPBUF_RESERVED_SPACE_ZS,
|
|
v->SRExitTime,
|
|
v->SRExitZ8Time,
|
|
v->SynchronizedVBlank,
|
|
v->StutterEnterPlusExitWatermark,
|
|
v->Z8StutterEnterPlusExitWatermark,
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
v->Interlace,
|
|
v->MinTTUVBlank,
|
|
v->DPPPerPlane,
|
|
v->DETBufferSizeY,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelDETY,
|
|
v->SwathWidthY,
|
|
v->SwathHeightY,
|
|
v->SwathHeightC,
|
|
v->DCCRateLuma,
|
|
v->DCCRateChroma,
|
|
v->DCCFractionOfZeroSizeRequestsLuma,
|
|
v->DCCFractionOfZeroSizeRequestsChroma,
|
|
v->HTotal,
|
|
v->VTotal,
|
|
v->PixelClock,
|
|
v->VRatio,
|
|
v->SourceScan,
|
|
v->BlockHeight256BytesY,
|
|
v->BlockWidth256BytesY,
|
|
v->BlockHeight256BytesC,
|
|
v->BlockWidth256BytesC,
|
|
v->DCCYMaxUncompressedBlock,
|
|
v->DCCCMaxUncompressedBlock,
|
|
v->VActive,
|
|
v->DCCEnable,
|
|
v->WritebackEnable,
|
|
v->ReadBandwidthPlaneLuma,
|
|
v->ReadBandwidthPlaneChroma,
|
|
v->meta_row_bw,
|
|
v->dpte_row_bw,
|
|
&v->StutterEfficiencyNotIncludingVBlank,
|
|
&v->StutterEfficiency,
|
|
&v->NumberOfStutterBurstsPerFrame,
|
|
&v->Z8StutterEfficiencyNotIncludingVBlank,
|
|
&v->Z8StutterEfficiency,
|
|
&v->Z8NumberOfStutterBurstsPerFrame,
|
|
&v->StutterPeriod);
|
|
}
|
|
|
|
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
// Display Pipe Configuration
|
|
double BytePerPixDETY[DC__NUM_DPP__MAX];
|
|
double BytePerPixDETC[DC__NUM_DPP__MAX];
|
|
int BytePerPixY[DC__NUM_DPP__MAX];
|
|
int BytePerPixC[DC__NUM_DPP__MAX];
|
|
int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
|
|
int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
|
|
int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
|
|
int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
|
|
double dummy1[DC__NUM_DPP__MAX];
|
|
double dummy2[DC__NUM_DPP__MAX];
|
|
double dummy3[DC__NUM_DPP__MAX];
|
|
double dummy4[DC__NUM_DPP__MAX];
|
|
int dummy5[DC__NUM_DPP__MAX];
|
|
int dummy6[DC__NUM_DPP__MAX];
|
|
bool dummy7[DC__NUM_DPP__MAX];
|
|
bool dummysinglestring;
|
|
|
|
unsigned int k;
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
|
|
dml30_CalculateBytePerPixelAnd256BBlockSizes(
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
&BytePerPixY[k],
|
|
&BytePerPixC[k],
|
|
&BytePerPixDETY[k],
|
|
&BytePerPixDETC[k],
|
|
&Read256BytesBlockHeightY[k],
|
|
&Read256BytesBlockHeightC[k],
|
|
&Read256BytesBlockWidthY[k],
|
|
&Read256BytesBlockWidthC[k]);
|
|
}
|
|
|
|
CalculateSwathAndDETConfiguration(
|
|
false,
|
|
v->NumberOfActivePlanes,
|
|
mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
|
|
v->DETBufferSizeInKByte,
|
|
dummy1,
|
|
dummy2,
|
|
v->SourceScan,
|
|
v->SourcePixelFormat,
|
|
v->SurfaceTiling,
|
|
v->ViewportWidth,
|
|
v->ViewportHeight,
|
|
v->SurfaceWidthY,
|
|
v->SurfaceWidthC,
|
|
v->SurfaceHeightY,
|
|
v->SurfaceHeightC,
|
|
Read256BytesBlockHeightY,
|
|
Read256BytesBlockHeightC,
|
|
Read256BytesBlockWidthY,
|
|
Read256BytesBlockWidthC,
|
|
v->ODMCombineEnabled,
|
|
v->BlendingAndTiming,
|
|
BytePerPixY,
|
|
BytePerPixC,
|
|
BytePerPixDETY,
|
|
BytePerPixDETC,
|
|
v->HActive,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->DPPPerPlane,
|
|
dummy5,
|
|
dummy6,
|
|
dummy3,
|
|
dummy4,
|
|
v->SwathHeightY,
|
|
v->SwathHeightC,
|
|
v->DETBufferSizeY,
|
|
v->DETBufferSizeC,
|
|
dummy7,
|
|
&dummysinglestring);
|
|
}
|
|
|
|
static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
|
|
{
|
|
if (PrefetchMode == 0) {
|
|
return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
|
|
} else if (PrefetchMode == 1) {
|
|
return dml_max(SREnterPlusExitTime, UrgentLatency);
|
|
} else {
|
|
return UrgentLatency;
|
|
}
|
|
}
|
|
|
|
double dml31_CalculateWriteBackDISPCLK(
|
|
enum source_format_class WritebackPixelFormat,
|
|
double PixelClock,
|
|
double WritebackHRatio,
|
|
double WritebackVRatio,
|
|
unsigned int WritebackHTaps,
|
|
unsigned int WritebackVTaps,
|
|
long WritebackSourceWidth,
|
|
long WritebackDestinationWidth,
|
|
unsigned int HTotal,
|
|
unsigned int WritebackLineBufferSize)
|
|
{
|
|
double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
|
|
|
|
DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
|
|
DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
|
|
DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
|
|
return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
|
|
}
|
|
|
|
static double CalculateWriteBackDelay(
|
|
enum source_format_class WritebackPixelFormat,
|
|
double WritebackHRatio,
|
|
double WritebackVRatio,
|
|
unsigned int WritebackVTaps,
|
|
int WritebackDestinationWidth,
|
|
int WritebackDestinationHeight,
|
|
int WritebackSourceHeight,
|
|
unsigned int HTotal)
|
|
{
|
|
double CalculateWriteBackDelay;
|
|
double Line_length;
|
|
double Output_lines_last_notclamped;
|
|
double WritebackVInit;
|
|
|
|
WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
|
|
Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
|
|
Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
|
|
if (Output_lines_last_notclamped < 0) {
|
|
CalculateWriteBackDelay = 0;
|
|
} else {
|
|
CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
|
|
}
|
|
return CalculateWriteBackDelay;
|
|
}
|
|
|
|
static void CalculateVupdateAndDynamicMetadataParameters(
|
|
int MaxInterDCNTileRepeaters,
|
|
double DPPCLK,
|
|
double DISPCLK,
|
|
double DCFClkDeepSleep,
|
|
double PixelClock,
|
|
int HTotal,
|
|
int VBlank,
|
|
int DynamicMetadataTransmittedBytes,
|
|
int DynamicMetadataLinesBeforeActiveRequired,
|
|
int InterlaceEnable,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
double *TSetup,
|
|
double *Tdmbf,
|
|
double *Tdmec,
|
|
double *Tdmsks,
|
|
int *VUpdateOffsetPix,
|
|
double *VUpdateWidthPix,
|
|
double *VReadyOffsetPix)
|
|
{
|
|
double TotalRepeaterDelayTime;
|
|
|
|
TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
|
|
*VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
|
|
*VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
|
|
*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
|
|
*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
|
|
*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
|
|
*Tdmec = HTotal / PixelClock;
|
|
if (DynamicMetadataLinesBeforeActiveRequired == 0) {
|
|
*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
|
|
} else {
|
|
*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
|
|
}
|
|
if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
|
|
*Tdmsks = *Tdmsks / 2;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
|
|
dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
|
|
dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
|
|
#endif
|
|
}
|
|
|
|
static void CalculateRowBandwidth(
|
|
bool GPUVMEnable,
|
|
enum source_format_class SourcePixelFormat,
|
|
double VRatio,
|
|
double VRatioChroma,
|
|
bool DCCEnable,
|
|
double LineTime,
|
|
unsigned int MetaRowByteLuma,
|
|
unsigned int MetaRowByteChroma,
|
|
unsigned int meta_row_height_luma,
|
|
unsigned int meta_row_height_chroma,
|
|
unsigned int PixelPTEBytesPerRowLuma,
|
|
unsigned int PixelPTEBytesPerRowChroma,
|
|
unsigned int dpte_row_height_luma,
|
|
unsigned int dpte_row_height_chroma,
|
|
double *meta_row_bw,
|
|
double *dpte_row_bw)
|
|
{
|
|
if (DCCEnable != true) {
|
|
*meta_row_bw = 0;
|
|
} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
|
|
*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
|
|
} else {
|
|
*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
|
|
}
|
|
|
|
if (GPUVMEnable != true) {
|
|
*dpte_row_bw = 0;
|
|
} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
|
|
*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
|
|
+ VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
|
|
} else {
|
|
*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
|
|
}
|
|
}
|
|
|
|
static void CalculateFlipSchedule(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int k,
|
|
double HostVMInefficiencyFactor,
|
|
double UrgentExtraLatency,
|
|
double UrgentLatency,
|
|
double PDEAndMetaPTEBytesPerFrame,
|
|
double MetaRowBytes,
|
|
double DPTEBytesPerRow)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
double min_row_time = 0.0;
|
|
unsigned int HostVMDynamicLevelsTrips;
|
|
double TimeForFetchingMetaPTEImmediateFlip;
|
|
double TimeForFetchingRowInVBlankImmediateFlip;
|
|
double ImmediateFlipBW;
|
|
double LineTime = v->HTotal[k] / v->PixelClock[k];
|
|
|
|
if (v->GPUVMEnable == true && v->HostVMEnable == true) {
|
|
HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
|
|
} else {
|
|
HostVMDynamicLevelsTrips = 0;
|
|
}
|
|
|
|
if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
|
|
ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
|
|
}
|
|
|
|
if (v->GPUVMEnable == true) {
|
|
TimeForFetchingMetaPTEImmediateFlip = dml_max3(
|
|
v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
|
|
UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
|
|
LineTime / 4.0);
|
|
} else {
|
|
TimeForFetchingMetaPTEImmediateFlip = 0;
|
|
}
|
|
|
|
v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
|
|
if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
|
|
TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
|
|
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
|
|
UrgentLatency * (HostVMDynamicLevelsTrips + 1),
|
|
LineTime / 4);
|
|
} else {
|
|
TimeForFetchingRowInVBlankImmediateFlip = 0;
|
|
}
|
|
|
|
v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
|
|
|
|
if (v->GPUVMEnable == true) {
|
|
v->final_flip_bw[k] = dml_max(
|
|
PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
|
|
(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
|
|
} else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
|
|
v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
|
|
} else {
|
|
v->final_flip_bw[k] = 0;
|
|
}
|
|
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
|
|
if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
|
|
min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
|
|
} else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
|
|
min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
|
|
} else {
|
|
min_row_time = dml_min4(
|
|
v->dpte_row_height[k] * LineTime / v->VRatio[k],
|
|
v->meta_row_height[k] * LineTime / v->VRatio[k],
|
|
v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
|
|
v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
|
|
}
|
|
} else {
|
|
if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
|
|
min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
|
|
} else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
|
|
min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
|
|
} else {
|
|
min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
|
|
}
|
|
}
|
|
|
|
if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
|
|
|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
|
|
v->ImmediateFlipSupportedForPipe[k] = false;
|
|
} else {
|
|
v->ImmediateFlipSupportedForPipe[k] = true;
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
|
|
dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
|
|
dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
|
|
dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
|
|
dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
|
|
dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
|
|
#endif
|
|
|
|
}
|
|
|
|
static double TruncToValidBPP(
|
|
double LinkBitRate,
|
|
int Lanes,
|
|
int HTotal,
|
|
int HActive,
|
|
double PixelClock,
|
|
double DesiredBPP,
|
|
bool DSCEnable,
|
|
enum output_encoder_class Output,
|
|
enum output_format_class Format,
|
|
unsigned int DSCInputBitPerComponent,
|
|
int DSCSlices,
|
|
int AudioRate,
|
|
int AudioLayout,
|
|
enum odm_combine_mode ODMCombine)
|
|
{
|
|
double MaxLinkBPP;
|
|
int MinDSCBPP;
|
|
double MaxDSCBPP;
|
|
int NonDSCBPP0;
|
|
int NonDSCBPP1;
|
|
int NonDSCBPP2;
|
|
|
|
if (Format == dm_420) {
|
|
NonDSCBPP0 = 12;
|
|
NonDSCBPP1 = 15;
|
|
NonDSCBPP2 = 18;
|
|
MinDSCBPP = 6;
|
|
MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
|
|
} else if (Format == dm_444) {
|
|
NonDSCBPP0 = 24;
|
|
NonDSCBPP1 = 30;
|
|
NonDSCBPP2 = 36;
|
|
MinDSCBPP = 8;
|
|
MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
|
|
} else {
|
|
|
|
NonDSCBPP0 = 16;
|
|
NonDSCBPP1 = 20;
|
|
NonDSCBPP2 = 24;
|
|
|
|
if (Format == dm_n422) {
|
|
MinDSCBPP = 7;
|
|
MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
|
|
} else {
|
|
MinDSCBPP = 8;
|
|
MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
|
|
}
|
|
}
|
|
|
|
if (DSCEnable && Output == dm_dp) {
|
|
MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
|
|
} else {
|
|
MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
|
|
}
|
|
|
|
if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
|
|
MaxLinkBPP = 16;
|
|
} else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
|
|
MaxLinkBPP = 32;
|
|
}
|
|
|
|
if (DesiredBPP == 0) {
|
|
if (DSCEnable) {
|
|
if (MaxLinkBPP < MinDSCBPP) {
|
|
return BPP_INVALID;
|
|
} else if (MaxLinkBPP >= MaxDSCBPP) {
|
|
return MaxDSCBPP;
|
|
} else {
|
|
return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
|
|
}
|
|
} else {
|
|
if (MaxLinkBPP >= NonDSCBPP2) {
|
|
return NonDSCBPP2;
|
|
} else if (MaxLinkBPP >= NonDSCBPP1) {
|
|
return NonDSCBPP1;
|
|
} else if (MaxLinkBPP >= NonDSCBPP0) {
|
|
return 16.0;
|
|
} else {
|
|
return BPP_INVALID;
|
|
}
|
|
}
|
|
} else {
|
|
if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
|
|
|| (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
|
|
return BPP_INVALID;
|
|
} else {
|
|
return DesiredBPP;
|
|
}
|
|
}
|
|
return BPP_INVALID;
|
|
}
|
|
|
|
static noinline void CalculatePrefetchSchedulePerPlane(
|
|
struct display_mode_lib *mode_lib,
|
|
double HostVMInefficiencyFactor,
|
|
int i,
|
|
unsigned j,
|
|
unsigned k)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
Pipe myPipe;
|
|
|
|
myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
|
|
myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
|
|
myPipe.PixelClock = v->PixelClock[k];
|
|
myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
|
|
myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
|
|
myPipe.ScalerEnabled = v->ScalerEnabled[k];
|
|
myPipe.VRatio = mode_lib->vba.VRatio[k];
|
|
myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
|
|
|
|
myPipe.SourceScan = v->SourceScan[k];
|
|
myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
|
|
myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
|
|
myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
|
|
myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
|
|
myPipe.InterlaceEnable = v->Interlace[k];
|
|
myPipe.NumberOfCursors = v->NumberOfCursors[k];
|
|
myPipe.VBlank = v->VTotal[k] - v->VActive[k];
|
|
myPipe.HTotal = v->HTotal[k];
|
|
myPipe.DCCEnable = v->DCCEnable[k];
|
|
myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
|
|
|| v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
|
|
myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
|
|
myPipe.BytePerPixelY = v->BytePerPixelY[k];
|
|
myPipe.BytePerPixelC = v->BytePerPixelC[k];
|
|
myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
|
|
v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
|
|
mode_lib,
|
|
HostVMInefficiencyFactor,
|
|
&myPipe,
|
|
v->DSCDelayPerState[i][k],
|
|
v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
|
|
v->DPPCLKDelaySCL,
|
|
v->DPPCLKDelaySCLLBOnly,
|
|
v->DPPCLKDelayCNVCCursor,
|
|
v->DISPCLKDelaySubtotal,
|
|
v->SwathWidthYThisState[k] / v->HRatio[k],
|
|
v->OutputFormat[k],
|
|
v->MaxInterDCNTileRepeaters,
|
|
dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
|
|
v->MaximumVStartup[i][j][k],
|
|
v->GPUVMMaxPageTableLevels,
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->HostVMMinPageSize,
|
|
v->DynamicMetadataEnable[k],
|
|
v->DynamicMetadataVMEnabled,
|
|
v->DynamicMetadataLinesBeforeActiveRequired[k],
|
|
v->DynamicMetadataTransmittedBytes[k],
|
|
v->UrgLatency[i],
|
|
v->ExtraLatency,
|
|
v->TimeCalc,
|
|
v->PDEAndMetaPTEBytesPerFrame[i][j][k],
|
|
v->MetaRowBytes[i][j][k],
|
|
v->DPTEBytesPerRow[i][j][k],
|
|
v->PrefetchLinesY[i][j][k],
|
|
v->SwathWidthYThisState[k],
|
|
v->PrefillY[k],
|
|
v->MaxNumSwY[k],
|
|
v->PrefetchLinesC[i][j][k],
|
|
v->SwathWidthCThisState[k],
|
|
v->PrefillC[k],
|
|
v->MaxNumSwC[k],
|
|
v->swath_width_luma_ub_this_state[k],
|
|
v->swath_width_chroma_ub_this_state[k],
|
|
v->SwathHeightYThisState[k],
|
|
v->SwathHeightCThisState[k],
|
|
v->TWait,
|
|
&v->DSTXAfterScaler[k],
|
|
&v->DSTYAfterScaler[k],
|
|
&v->LineTimesForPrefetch[k],
|
|
&v->PrefetchBW[k],
|
|
&v->LinesForMetaPTE[k],
|
|
&v->LinesForMetaAndDPTERow[k],
|
|
&v->VRatioPreY[i][j][k],
|
|
&v->VRatioPreC[i][j][k],
|
|
&v->RequiredPrefetchPixelDataBWLuma[i][j][k],
|
|
&v->RequiredPrefetchPixelDataBWChroma[i][j][k],
|
|
&v->NoTimeForDynamicMetadata[i][j][k],
|
|
&v->Tno_bw[k],
|
|
&v->prefetch_vmrow_bw[k],
|
|
&v->dummy7[k],
|
|
&v->dummy8[k],
|
|
&v->dummy13[k],
|
|
&v->VUpdateOffsetPix[k],
|
|
&v->VUpdateWidthPix[k],
|
|
&v->VReadyOffsetPix[k]);
|
|
}
|
|
|
|
static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
|
|
{
|
|
int i, total_pipes = 0;
|
|
for (i = 0; i < NumberOfActivePlanes; i++)
|
|
total_pipes += NoOfDPPThisState[i];
|
|
DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
|
|
if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
|
|
DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
|
|
for (i = 1; i < NumberOfActivePlanes; i++)
|
|
DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
|
|
}
|
|
|
|
|
|
void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
|
|
int i, j;
|
|
unsigned int k, m;
|
|
int ReorderingBytes;
|
|
int MinPrefetchMode = 0, MaxPrefetchMode = 2;
|
|
bool NoChroma = true;
|
|
bool EnoughWritebackUnits = true;
|
|
bool P2IWith420 = false;
|
|
bool DSCOnlyIfNecessaryWithBPP = false;
|
|
bool DSC422NativeNotSupported = false;
|
|
double MaxTotalVActiveRDBandwidth;
|
|
bool ViewportExceedsSurface = false;
|
|
bool FMTBufferExceeded = false;
|
|
|
|
/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
|
|
|
|
CalculateMinAndMaxPrefetchMode(
|
|
mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
|
|
&MinPrefetchMode, &MaxPrefetchMode);
|
|
|
|
/*Scale Ratio, taps Support Check*/
|
|
|
|
v->ScaleRatioAndTapsSupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->ScalerEnabled[k] == false
|
|
&& ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
|
|
&& v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
|
|
&& v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
|
|
&& v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
|
|
|| v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
|
|
v->ScaleRatioAndTapsSupport = false;
|
|
} else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
|
|
|| (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
|
|
|| v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
|
|
|| v->VRatio[k] > v->vtaps[k]
|
|
|| (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
|
|
&& v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
|
|
&& v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
|
|
&& (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
|
|
|| v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
|
|
|| v->HRatioChroma[k] > v->MaxHSCLRatio
|
|
|| v->VRatioChroma[k] > v->MaxVSCLRatio
|
|
|| v->HRatioChroma[k] > v->HTAPsChroma[k]
|
|
|| v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
|
|
v->ScaleRatioAndTapsSupport = false;
|
|
}
|
|
}
|
|
/*Source Format, Pixel Format and Scan Support Check*/
|
|
|
|
v->SourceFormatPixelAndScanSupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
|
|
|| ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
|
|
|| v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
|
|
v->SourceFormatPixelAndScanSupport = false;
|
|
}
|
|
}
|
|
/*Bandwidth Support Check*/
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
dml30_CalculateBytePerPixelAnd256BBlockSizes(
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
&v->BytePerPixelY[k],
|
|
&v->BytePerPixelC[k],
|
|
&v->BytePerPixelInDETY[k],
|
|
&v->BytePerPixelInDETC[k],
|
|
&v->Read256BlockHeightY[k],
|
|
&v->Read256BlockHeightC[k],
|
|
&v->Read256BlockWidthY[k],
|
|
&v->Read256BlockWidthC[k]);
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->SourceScan[k] != dm_vert) {
|
|
v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
|
|
v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
|
|
} else {
|
|
v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
|
|
v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
|
|
}
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
|
|
v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
|
|
v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
|
|
/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
|
|
} else if (v->WritebackEnable[k] == true) {
|
|
v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
|
|
/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
|
|
} else {
|
|
v->WriteBandwidth[k] = 0.0;
|
|
}
|
|
}
|
|
|
|
/*Writeback Latency support check*/
|
|
|
|
v->WritebackLatencySupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
|
|
v->WritebackLatencySupport = false;
|
|
}
|
|
}
|
|
|
|
/*Writeback Mode Support Check*/
|
|
|
|
v->TotalNumberOfActiveWriteback = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
|
|
}
|
|
}
|
|
|
|
if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
|
|
EnoughWritebackUnits = false;
|
|
}
|
|
|
|
/*Writeback Scale Ratio and Taps Support Check*/
|
|
|
|
v->WritebackScaleRatioAndTapsSupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
|
|
|| v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
|
|
|| v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
|
|
|| v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
|
|
|| v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
|
|
|| v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
|
|
|| (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
|
|
v->WritebackScaleRatioAndTapsSupport = false;
|
|
}
|
|
if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
|
|
v->WritebackScaleRatioAndTapsSupport = false;
|
|
}
|
|
}
|
|
}
|
|
/*Maximum DISPCLK/DPPCLK Support check*/
|
|
|
|
v->WritebackRequiredDISPCLK = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->WritebackRequiredDISPCLK = dml_max(
|
|
v->WritebackRequiredDISPCLK,
|
|
dml31_CalculateWriteBackDISPCLK(
|
|
v->WritebackPixelFormat[k],
|
|
v->PixelClock[k],
|
|
v->WritebackHRatio[k],
|
|
v->WritebackVRatio[k],
|
|
v->WritebackHTaps[k],
|
|
v->WritebackVTaps[k],
|
|
v->WritebackSourceWidth[k],
|
|
v->WritebackDestinationWidth[k],
|
|
v->HTotal[k],
|
|
v->WritebackLineBufferSize));
|
|
}
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->HRatio[k] > 1.0) {
|
|
v->PSCL_FACTOR[k] = dml_min(
|
|
v->MaxDCHUBToPSCLThroughput,
|
|
v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
|
|
} else {
|
|
v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
|
|
}
|
|
if (v->BytePerPixelC[k] == 0.0) {
|
|
v->PSCL_FACTOR_CHROMA[k] = 0.0;
|
|
v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
|
|
* dml_max3(
|
|
v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
|
|
v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
|
|
1.0);
|
|
if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
|
|
v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
|
|
}
|
|
} else {
|
|
if (v->HRatioChroma[k] > 1.0) {
|
|
v->PSCL_FACTOR_CHROMA[k] = dml_min(
|
|
v->MaxDCHUBToPSCLThroughput,
|
|
v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
|
|
} else {
|
|
v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
|
|
}
|
|
v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
|
|
* dml_max5(
|
|
v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
|
|
v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
|
|
v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
|
|
v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
|
|
1.0);
|
|
if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
|
|
&& v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
|
|
v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
|
|
}
|
|
}
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
int MaximumSwathWidthSupportLuma;
|
|
int MaximumSwathWidthSupportChroma;
|
|
|
|
if (v->SurfaceTiling[k] == dm_sw_linear) {
|
|
MaximumSwathWidthSupportLuma = 8192.0;
|
|
} else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
|
|
MaximumSwathWidthSupportLuma = 2880.0;
|
|
} else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
|
|
MaximumSwathWidthSupportLuma = 3840.0;
|
|
} else {
|
|
MaximumSwathWidthSupportLuma = 5760.0;
|
|
}
|
|
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
|
|
MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
|
|
} else {
|
|
MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
|
|
}
|
|
v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
|
|
/ (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
|
|
if (v->BytePerPixelC[k] == 0.0) {
|
|
v->MaximumSwathWidthInLineBufferChroma = 0;
|
|
} else {
|
|
v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
|
|
/ (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
|
|
}
|
|
v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
|
|
v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
|
|
}
|
|
|
|
CalculateSwathAndDETConfiguration(
|
|
true,
|
|
v->NumberOfActivePlanes,
|
|
mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
|
|
v->DETBufferSizeInKByte,
|
|
v->MaximumSwathWidthLuma,
|
|
v->MaximumSwathWidthChroma,
|
|
v->SourceScan,
|
|
v->SourcePixelFormat,
|
|
v->SurfaceTiling,
|
|
v->ViewportWidth,
|
|
v->ViewportHeight,
|
|
v->SurfaceWidthY,
|
|
v->SurfaceWidthC,
|
|
v->SurfaceHeightY,
|
|
v->SurfaceHeightC,
|
|
v->Read256BlockHeightY,
|
|
v->Read256BlockHeightC,
|
|
v->Read256BlockWidthY,
|
|
v->Read256BlockWidthC,
|
|
v->odm_combine_dummy,
|
|
v->BlendingAndTiming,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->BytePerPixelInDETY,
|
|
v->BytePerPixelInDETC,
|
|
v->HActive,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->NoOfDPPThisState,
|
|
v->swath_width_luma_ub_this_state,
|
|
v->swath_width_chroma_ub_this_state,
|
|
v->SwathWidthYThisState,
|
|
v->SwathWidthCThisState,
|
|
v->SwathHeightYThisState,
|
|
v->SwathHeightCThisState,
|
|
v->DETBufferSizeYThisState,
|
|
v->DETBufferSizeCThisState,
|
|
v->SingleDPPViewportSizeSupportPerPlane,
|
|
&v->ViewportSizeSupport[0][0]);
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (j = 0; j < 2; j++) {
|
|
v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
|
|
v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
|
|
v->RequiredDISPCLK[i][j] = 0.0;
|
|
v->DISPCLK_DPPCLK_Support[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
* (1.0 + v->DISPCLKRampingMargin / 100.0);
|
|
if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
|
|
&& v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
|
|
&& v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
|
|
v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
|
|
* (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
}
|
|
v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
* (1 + v->DISPCLKRampingMargin / 100.0);
|
|
if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
|
|
&& v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
|
|
&& v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
|
|
v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
|
|
* (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
}
|
|
v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
* (1 + v->DISPCLKRampingMargin / 100.0);
|
|
if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
|
|
&& v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
|
|
&& v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
|
|
v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
|
|
* (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
}
|
|
|
|
if (v->ODMCombinePolicy == dm_odm_combine_policy_none
|
|
|| !(v->Output[k] == dm_dp ||
|
|
v->Output[k] == dm_dp2p0 ||
|
|
v->Output[k] == dm_edp)) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
|
|
|
|
if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
|
|
FMTBufferExceeded = true;
|
|
} else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
|
|
} else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
|
|
|| v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
|
|
} else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
|
|
} else {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
|
|
}
|
|
if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
|
|
&& v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
|
|
if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
|
|
} else {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
|
|
}
|
|
}
|
|
if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
|
|
&& v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
|
|
if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
|
|
|
|
if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
|
|
FMTBufferExceeded = true;
|
|
} else {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
|
|
v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
|
|
}
|
|
}
|
|
if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
|
|
v->MPCCombine[i][j][k] = false;
|
|
v->NoOfDPP[i][j][k] = 4;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
|
|
} else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
|
|
v->MPCCombine[i][j][k] = false;
|
|
v->NoOfDPP[i][j][k] = 2;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
|
|
} else if ((v->WhenToDoMPCCombine == dm_mpc_never
|
|
|| (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
<= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
|
|
v->MPCCombine[i][j][k] = false;
|
|
v->NoOfDPP[i][j][k] = 1;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
} else {
|
|
v->MPCCombine[i][j][k] = true;
|
|
v->NoOfDPP[i][j][k] = 2;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
|
|
}
|
|
v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
|
|
if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
> v->MaxDppclkRoundedDownToDFSGranularity)
|
|
|| (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
|
|
v->DISPCLK_DPPCLK_Support[i][j] = false;
|
|
}
|
|
if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
|
|
v->MPCCombine[i][j][k] = true;
|
|
v->NoOfDPP[i][j][k] = 2;
|
|
}
|
|
}
|
|
v->TotalNumberOfActiveDPP[i][j] = 0;
|
|
v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
|
|
if (v->NoOfDPP[i][j][k] == 1)
|
|
v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
|
|
|| v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
|
|
NoChroma = false;
|
|
}
|
|
|
|
// UPTO
|
|
if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
|
|
&& !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
|
|
while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
|
|
double BWOfNonSplitPlaneOfMaximumBandwidth;
|
|
unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
|
|
BWOfNonSplitPlaneOfMaximumBandwidth = 0;
|
|
NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
|
|
&& v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
|
|
BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
|
|
NumberOfNonSplitPlaneOfMaximumBandwidth = k;
|
|
}
|
|
}
|
|
v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
|
|
v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
|
|
v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
|
|
v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
|
|
* (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
|
|
v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
|
|
v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
|
|
}
|
|
}
|
|
if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
|
|
v->RequiredDISPCLK[i][j] = 0.0;
|
|
v->DISPCLK_DPPCLK_Support[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
|
|
if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
|
|
v->MPCCombine[i][j][k] = true;
|
|
v->NoOfDPP[i][j][k] = 2;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
|
|
* (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
|
|
} else {
|
|
v->MPCCombine[i][j][k] = false;
|
|
v->NoOfDPP[i][j][k] = 1;
|
|
v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
|
|
* (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
}
|
|
if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
|
|
&& v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
|
|
v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
* (1.0 + v->DISPCLKRampingMargin / 100.0);
|
|
} else {
|
|
v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
|
|
}
|
|
v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
|
|
if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
|
|
> v->MaxDppclkRoundedDownToDFSGranularity)
|
|
|| (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
|
|
v->DISPCLK_DPPCLK_Support[i][j] = false;
|
|
}
|
|
}
|
|
v->TotalNumberOfActiveDPP[i][j] = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
|
|
}
|
|
}
|
|
v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
|
|
if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
|
|
v->DISPCLK_DPPCLK_Support[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*Total Available Pipes Support Check*/
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (j = 0; j < 2; j++) {
|
|
if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
|
|
v->TotalAvailablePipesSupport[i][j] = true;
|
|
} else {
|
|
v->TotalAvailablePipesSupport[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
/*Display IO and DSC Support Check*/
|
|
|
|
v->NonsupportedDSCInputBPC = false;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
|
|
|| v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
|
|
v->NonsupportedDSCInputBPC = true;
|
|
}
|
|
}
|
|
|
|
/*Number Of DSC Slices*/
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
if (v->PixelClockBackEnd[k] > 3200) {
|
|
v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
|
|
} else if (v->PixelClockBackEnd[k] > 1360) {
|
|
v->NumberOfDSCSlices[k] = 8;
|
|
} else if (v->PixelClockBackEnd[k] > 680) {
|
|
v->NumberOfDSCSlices[k] = 4;
|
|
} else if (v->PixelClockBackEnd[k] > 340) {
|
|
v->NumberOfDSCSlices[k] = 2;
|
|
} else {
|
|
v->NumberOfDSCSlices[k] = 1;
|
|
}
|
|
} else {
|
|
v->NumberOfDSCSlices[k] = 0;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->RequiresDSC[i][k] = false;
|
|
v->RequiresFEC[i][k] = false;
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
if (v->Output[k] == dm_hdmi) {
|
|
v->RequiresDSC[i][k] = false;
|
|
v->RequiresFEC[i][k] = false;
|
|
v->OutputBppPerState[i][k] = TruncToValidBPP(
|
|
dml_min(600.0, v->PHYCLKPerState[i]) * 10,
|
|
3,
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
false,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
} else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
|
|
if (v->DSCEnable[k] == true) {
|
|
v->RequiresDSC[i][k] = true;
|
|
v->LinkDSCEnable = true;
|
|
if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
|
|
v->RequiresFEC[i][k] = true;
|
|
} else {
|
|
v->RequiresFEC[i][k] = false;
|
|
}
|
|
} else {
|
|
v->RequiresDSC[i][k] = false;
|
|
v->LinkDSCEnable = false;
|
|
if (v->Output[k] == dm_dp2p0) {
|
|
v->RequiresFEC[i][k] = true;
|
|
} else {
|
|
v->RequiresFEC[i][k] = false;
|
|
}
|
|
}
|
|
if (v->Output[k] == dm_dp2p0) {
|
|
v->Outbpp = BPP_INVALID;
|
|
if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
|
|
v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 10000,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
|
|
v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
|
|
v->RequiresDSC[i][k] = true;
|
|
v->LinkDSCEnable = true;
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 10000,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
}
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
|
|
}
|
|
if (v->Outbpp == BPP_INVALID &&
|
|
(v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
|
|
v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 13500,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
|
|
v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
|
|
v->RequiresDSC[i][k] = true;
|
|
v->LinkDSCEnable = true;
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 13500,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
}
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
|
|
}
|
|
if (v->Outbpp == BPP_INVALID &&
|
|
(v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
|
|
v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 20000,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
|
|
v->ForcedOutputLinkBPP[k] == 0) {
|
|
v->RequiresDSC[i][k] = true;
|
|
v->LinkDSCEnable = true;
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 20000,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
}
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
|
|
}
|
|
} else {
|
|
v->Outbpp = BPP_INVALID;
|
|
if (v->PHYCLKPerState[i] >= 270.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 2700,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
|
|
}
|
|
if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 5400,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
|
|
}
|
|
if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
|
|
v->Outbpp = TruncToValidBPP(
|
|
(1.0 - v->Downspreading / 100.0) * 8100,
|
|
v->OutputLinkDPLanes[k],
|
|
v->HTotal[k],
|
|
v->HActive[k],
|
|
v->PixelClockBackEnd[k],
|
|
v->ForcedOutputLinkBPP[k],
|
|
v->LinkDSCEnable,
|
|
v->Output[k],
|
|
v->OutputFormat[k],
|
|
v->DSCInputBitPerComponent[k],
|
|
v->NumberOfDSCSlices[k],
|
|
v->AudioSampleRate[k],
|
|
v->AudioSampleLayout[k],
|
|
v->ODMCombineEnablePerState[i][k]);
|
|
v->OutputBppPerState[i][k] = v->Outbpp;
|
|
// TODO: Need some other way to handle this nonsense
|
|
// v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
v->OutputBppPerState[i][k] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
v->LinkCapacitySupport[i] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->BlendingAndTiming[k] == k
|
|
&& (v->Output[k] == dm_dp ||
|
|
v->Output[k] == dm_edp ||
|
|
v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
|
|
v->LinkCapacitySupport[i] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// UPTO 2172
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k
|
|
&& (v->Output[k] == dm_dp ||
|
|
v->Output[k] == dm_edp ||
|
|
v->Output[k] == dm_hdmi)) {
|
|
if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
|
|
P2IWith420 = true;
|
|
}
|
|
if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
|
|
&& !v->DSC422NativeSupport) {
|
|
DSC422NativeNotSupported = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
v->ODMCombine4To1SupportCheckOK[i] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
|
|
&& (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
|
|
|| v->Output[k] == dm_hdmi)) {
|
|
v->ODMCombine4To1SupportCheckOK[i] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
v->NotEnoughDSCUnits[i] = false;
|
|
v->TotalDSCUnitsRequired = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->RequiresDSC[i][k] == true) {
|
|
if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
|
|
v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
|
|
} else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
|
|
v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
|
|
} else {
|
|
v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
|
|
}
|
|
}
|
|
}
|
|
if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
|
|
v->NotEnoughDSCUnits[i] = true;
|
|
}
|
|
}
|
|
/*DSC Delay per state*/
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->OutputBppPerState[i][k] == BPP_INVALID) {
|
|
v->BPP = 0.0;
|
|
} else {
|
|
v->BPP = v->OutputBppPerState[i][k];
|
|
}
|
|
if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
|
|
if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
|
|
v->DSCDelayPerState[i][k] = dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
v->BPP,
|
|
dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
|
|
v->NumberOfDSCSlices[k],
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
|
|
} else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
|
|
v->DSCDelayPerState[i][k] = 2.0
|
|
* (dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
v->BPP,
|
|
dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
|
|
v->NumberOfDSCSlices[k] / 2,
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
|
|
} else {
|
|
v->DSCDelayPerState[i][k] = 4.0
|
|
* (dscceComputeDelay(
|
|
v->DSCInputBitPerComponent[k],
|
|
v->BPP,
|
|
dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
|
|
v->NumberOfDSCSlices[k] / 4,
|
|
v->OutputFormat[k],
|
|
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
|
|
}
|
|
v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
|
|
} else {
|
|
v->DSCDelayPerState[i][k] = 0.0;
|
|
}
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
for (m = 0; m < v->NumberOfActivePlanes; m++) {
|
|
if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
|
|
v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//Calculate Swath, DET Configuration, DCFCLKDeepSleep
|
|
//
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
|
|
v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
|
|
v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
|
|
}
|
|
|
|
if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
|
|
PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
|
|
CalculateSwathAndDETConfiguration(
|
|
false,
|
|
v->NumberOfActivePlanes,
|
|
mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
|
|
v->DETBufferSizeInKByte,
|
|
v->MaximumSwathWidthLuma,
|
|
v->MaximumSwathWidthChroma,
|
|
v->SourceScan,
|
|
v->SourcePixelFormat,
|
|
v->SurfaceTiling,
|
|
v->ViewportWidth,
|
|
v->ViewportHeight,
|
|
v->SurfaceWidthY,
|
|
v->SurfaceWidthC,
|
|
v->SurfaceHeightY,
|
|
v->SurfaceHeightC,
|
|
v->Read256BlockHeightY,
|
|
v->Read256BlockHeightC,
|
|
v->Read256BlockWidthY,
|
|
v->Read256BlockWidthC,
|
|
v->ODMCombineEnableThisState,
|
|
v->BlendingAndTiming,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->BytePerPixelInDETY,
|
|
v->BytePerPixelInDETC,
|
|
v->HActive,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->NoOfDPPThisState,
|
|
v->swath_width_luma_ub_this_state,
|
|
v->swath_width_chroma_ub_this_state,
|
|
v->SwathWidthYThisState,
|
|
v->SwathWidthCThisState,
|
|
v->SwathHeightYThisState,
|
|
v->SwathHeightCThisState,
|
|
v->DETBufferSizeYThisState,
|
|
v->DETBufferSizeCThisState,
|
|
v->dummystring,
|
|
&v->ViewportSizeSupport[i][j]);
|
|
|
|
CalculateDCFCLKDeepSleep(
|
|
mode_lib,
|
|
v->NumberOfActivePlanes,
|
|
v->BytePerPixelY,
|
|
v->BytePerPixelC,
|
|
v->VRatio,
|
|
v->VRatioChroma,
|
|
v->SwathWidthYThisState,
|
|
v->SwathWidthCThisState,
|
|
v->NoOfDPPThisState,
|
|
v->HRatio,
|
|
v->HRatioChroma,
|
|
v->PixelClock,
|
|
v->PSCL_FACTOR,
|
|
v->PSCL_FACTOR_CHROMA,
|
|
v->RequiredDPPCLKThisState,
|
|
v->ReadBandwidthLuma,
|
|
v->ReadBandwidthChroma,
|
|
v->ReturnBusWidth,
|
|
&v->ProjectedDCFCLKDeepSleep[i][j]);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
|
|
v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
|
|
v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
|
|
v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
|
|
v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
|
|
v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
|
|
v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
|
|
v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
|
|
}
|
|
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (j = 0; j < 2; j++) {
|
|
bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
|
|
v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
|
|
v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
|
|
v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
|
|
v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
|
|
v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
|
|
v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
|
|
v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
|
|
}
|
|
|
|
v->TotalNumberOfDCCActiveDPP[i][j] = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->DCCEnable[k] == true) {
|
|
v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
|
|
|| v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
|
|
|
|
if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
|
|
&& v->SourceScan[k] != dm_vert) {
|
|
v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
|
|
/ 2;
|
|
v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
|
|
} else {
|
|
v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
|
|
v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
|
|
}
|
|
|
|
v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
|
|
mode_lib,
|
|
v->DCCEnable[k],
|
|
v->Read256BlockHeightC[k],
|
|
v->Read256BlockWidthC[k],
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
v->BytePerPixelC[k],
|
|
v->SourceScan[k],
|
|
v->SwathWidthCThisState[k],
|
|
v->ViewportHeightChroma[k],
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->GPUVMMinPageSize,
|
|
v->HostVMMinPageSize,
|
|
v->PTEBufferSizeInRequestsForChroma,
|
|
v->PitchC[k],
|
|
0.0,
|
|
&v->MacroTileWidthC[k],
|
|
&v->MetaRowBytesC,
|
|
&v->DPTEBytesPerRowC,
|
|
&v->PTEBufferSizeNotExceededC[i][j][k],
|
|
&v->dummyinteger7,
|
|
&v->dpte_row_height_chroma[k],
|
|
&v->dummyinteger28,
|
|
&v->dummyinteger26,
|
|
&v->dummyinteger23,
|
|
&v->meta_row_height_chroma[k],
|
|
&v->dummyinteger8,
|
|
&v->dummyinteger9,
|
|
&v->dummyinteger19,
|
|
&v->dummyinteger20,
|
|
&v->dummyinteger17,
|
|
&v->dummyinteger10,
|
|
&v->dummyinteger11);
|
|
|
|
v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
|
|
mode_lib,
|
|
v->VRatioChroma[k],
|
|
v->VTAPsChroma[k],
|
|
v->Interlace[k],
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
v->SwathHeightCThisState[k],
|
|
v->ViewportYStartC[k],
|
|
&v->PrefillC[k],
|
|
&v->MaxNumSwC[k]);
|
|
} else {
|
|
v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
|
|
v->PTEBufferSizeInRequestsForChroma = 0;
|
|
v->PDEAndMetaPTEBytesPerFrameC = 0.0;
|
|
v->MetaRowBytesC = 0.0;
|
|
v->DPTEBytesPerRowC = 0.0;
|
|
v->PrefetchLinesC[i][j][k] = 0.0;
|
|
v->PTEBufferSizeNotExceededC[i][j][k] = true;
|
|
}
|
|
v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
|
|
mode_lib,
|
|
v->DCCEnable[k],
|
|
v->Read256BlockHeightY[k],
|
|
v->Read256BlockWidthY[k],
|
|
v->SourcePixelFormat[k],
|
|
v->SurfaceTiling[k],
|
|
v->BytePerPixelY[k],
|
|
v->SourceScan[k],
|
|
v->SwathWidthYThisState[k],
|
|
v->ViewportHeight[k],
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->HostVMMaxNonCachedPageTableLevels,
|
|
v->GPUVMMinPageSize,
|
|
v->HostVMMinPageSize,
|
|
v->PTEBufferSizeInRequestsForLuma,
|
|
v->PitchY[k],
|
|
v->DCCMetaPitchY[k],
|
|
&v->MacroTileWidthY[k],
|
|
&v->MetaRowBytesY,
|
|
&v->DPTEBytesPerRowY,
|
|
&v->PTEBufferSizeNotExceededY[i][j][k],
|
|
&v->dummyinteger7,
|
|
&v->dpte_row_height[k],
|
|
&v->dummyinteger29,
|
|
&v->dummyinteger27,
|
|
&v->dummyinteger24,
|
|
&v->meta_row_height[k],
|
|
&v->dummyinteger25,
|
|
&v->dpte_group_bytes[k],
|
|
&v->dummyinteger21,
|
|
&v->dummyinteger22,
|
|
&v->dummyinteger18,
|
|
&v->dummyinteger5,
|
|
&v->dummyinteger6);
|
|
v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
|
|
mode_lib,
|
|
v->VRatio[k],
|
|
v->vtaps[k],
|
|
v->Interlace[k],
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
v->SwathHeightYThisState[k],
|
|
v->ViewportYStartY[k],
|
|
&v->PrefillY[k],
|
|
&v->MaxNumSwY[k]);
|
|
v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
|
|
v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
|
|
v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
|
|
|
|
CalculateRowBandwidth(
|
|
v->GPUVMEnable,
|
|
v->SourcePixelFormat[k],
|
|
v->VRatio[k],
|
|
v->VRatioChroma[k],
|
|
v->DCCEnable[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
v->MetaRowBytesY,
|
|
v->MetaRowBytesC,
|
|
v->meta_row_height[k],
|
|
v->meta_row_height_chroma[k],
|
|
v->DPTEBytesPerRowY,
|
|
v->DPTEBytesPerRowC,
|
|
v->dpte_row_height[k],
|
|
v->dpte_row_height_chroma[k],
|
|
&v->meta_row_bandwidth[i][j][k],
|
|
&v->dpte_row_bandwidth[i][j][k]);
|
|
}
|
|
/*DCCMetaBufferSizeSupport(i, j) = True
|
|
For k = 0 To NumberOfActivePlanes - 1
|
|
If MetaRowBytes(i, j, k) > 24064 Then
|
|
DCCMetaBufferSizeSupport(i, j) = False
|
|
End If
|
|
Next k*/
|
|
v->DCCMetaBufferSizeSupport[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->MetaRowBytes[i][j][k] > 24064)
|
|
v->DCCMetaBufferSizeSupport[i][j] = false;
|
|
}
|
|
v->UrgLatency[i] = CalculateUrgentLatency(
|
|
v->UrgentLatencyPixelDataOnly,
|
|
v->UrgentLatencyPixelMixedWithVMData,
|
|
v->UrgentLatencyVMDataOnly,
|
|
v->DoUrgentLatencyAdjustment,
|
|
v->UrgentLatencyAdjustmentFabricClockComponent,
|
|
v->UrgentLatencyAdjustmentFabricClockReference,
|
|
v->FabricClockPerState[i]);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
CalculateUrgentBurstFactor(
|
|
v->swath_width_luma_ub_this_state[k],
|
|
v->swath_width_chroma_ub_this_state[k],
|
|
v->SwathHeightYThisState[k],
|
|
v->SwathHeightCThisState[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
v->UrgLatency[i],
|
|
v->CursorBufferSize,
|
|
v->CursorWidth[k][0],
|
|
v->CursorBPP[k][0],
|
|
v->VRatio[k],
|
|
v->VRatioChroma[k],
|
|
v->BytePerPixelInDETY[k],
|
|
v->BytePerPixelInDETC[k],
|
|
v->DETBufferSizeYThisState[k],
|
|
v->DETBufferSizeCThisState[k],
|
|
&v->UrgentBurstFactorCursor[k],
|
|
&v->UrgentBurstFactorLuma[k],
|
|
&v->UrgentBurstFactorChroma[k],
|
|
&NotUrgentLatencyHiding[k]);
|
|
}
|
|
|
|
v->NotEnoughUrgentLatencyHidingA[i][j] = false;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (NotUrgentLatencyHiding[k]) {
|
|
v->NotEnoughUrgentLatencyHidingA[i][j] = true;
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
|
|
+ v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
|
|
v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
|
|
}
|
|
|
|
v->TotalVActivePixelBandwidth[i][j] = 0;
|
|
v->TotalVActiveCursorBandwidth[i][j] = 0;
|
|
v->TotalMetaRowBandwidth[i][j] = 0;
|
|
v->TotalDPTERowBandwidth[i][j] = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
|
|
v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
|
|
v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
|
|
v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
//Calculate Return BW
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->WritebackDelayTime[k] = v->WritebackLatency
|
|
+ CalculateWriteBackDelay(
|
|
v->WritebackPixelFormat[k],
|
|
v->WritebackHRatio[k],
|
|
v->WritebackVRatio[k],
|
|
v->WritebackVTaps[k],
|
|
v->WritebackDestinationWidth[k],
|
|
v->WritebackDestinationHeight[k],
|
|
v->WritebackSourceHeight[k],
|
|
v->HTotal[k]) / v->RequiredDISPCLK[i][j];
|
|
} else {
|
|
v->WritebackDelayTime[k] = 0.0;
|
|
}
|
|
for (m = 0; m < v->NumberOfActivePlanes; m++) {
|
|
if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
|
|
v->WritebackDelayTime[k] = dml_max(
|
|
v->WritebackDelayTime[k],
|
|
v->WritebackLatency
|
|
+ CalculateWriteBackDelay(
|
|
v->WritebackPixelFormat[m],
|
|
v->WritebackHRatio[m],
|
|
v->WritebackVRatio[m],
|
|
v->WritebackVTaps[m],
|
|
v->WritebackDestinationWidth[m],
|
|
v->WritebackDestinationHeight[m],
|
|
v->WritebackSourceHeight[m],
|
|
v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
for (m = 0; m < v->NumberOfActivePlanes; m++) {
|
|
if (v->BlendingAndTiming[k] == m) {
|
|
v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
|
|
}
|
|
}
|
|
}
|
|
v->MaxMaxVStartup[i][j] = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->MaximumVStartup[i][j][k] =
|
|
(v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
|
|
dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
|
|
v->VTotal[k] - v->VActive[k]
|
|
- dml_max(
|
|
1.0,
|
|
dml_ceil(
|
|
1.0 * v->WritebackDelayTime[k]
|
|
/ (v->HTotal[k]
|
|
/ v->PixelClock[k]),
|
|
1.0));
|
|
if (v->MaximumVStartup[i][j][k] > 1023)
|
|
v->MaximumVStartup[i][j][k] = 1023;
|
|
v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
|
|
}
|
|
}
|
|
}
|
|
|
|
ReorderingBytes = v->NumberOfChannels
|
|
* dml_max3(
|
|
v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
|
|
v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
|
|
v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
|
|
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
|
|
}
|
|
}
|
|
|
|
if (v->UseMinimumRequiredDCFCLK == true)
|
|
UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
|
|
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
double IdealFabricAndSDPPortBandwidthPerState = dml_min(
|
|
v->ReturnBusWidth * v->DCFCLKState[i][j],
|
|
v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
|
|
double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
|
|
double PixelDataOnlyReturnBWPerState = dml_min(
|
|
IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
|
|
double PixelMixedWithVMDataReturnBWPerState = dml_min(
|
|
IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
|
|
|
|
if (v->HostVMEnable != true) {
|
|
v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
|
|
} else {
|
|
v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
|
|
}
|
|
}
|
|
}
|
|
|
|
//Re-ordering Buffer Support Check
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
|
|
> (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
|
|
v->ROBSupport[i][j] = true;
|
|
} else {
|
|
v->ROBSupport[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//Vertical Active BW support check
|
|
|
|
MaxTotalVActiveRDBandwidth = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
|
|
}
|
|
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
|
|
dml_min(
|
|
v->ReturnBusWidth * v->DCFCLKState[i][j],
|
|
v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
|
|
* v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
|
|
v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
|
|
* v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
|
|
|
|
if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
|
|
v->TotalVerticalActiveBandwidthSupport[i][j] = true;
|
|
} else {
|
|
v->TotalVerticalActiveBandwidthSupport[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
v->UrgentLatency = CalculateUrgentLatency(
|
|
v->UrgentLatencyPixelDataOnly,
|
|
v->UrgentLatencyPixelMixedWithVMData,
|
|
v->UrgentLatencyVMDataOnly,
|
|
v->DoUrgentLatencyAdjustment,
|
|
v->UrgentLatencyAdjustmentFabricClockComponent,
|
|
v->UrgentLatencyAdjustmentFabricClockReference,
|
|
v->FabricClock);
|
|
//Prefetch Check
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
double VMDataOnlyReturnBWPerState;
|
|
double HostVMInefficiencyFactor = 1;
|
|
int NextPrefetchModeState = MinPrefetchMode;
|
|
bool UnboundedRequestEnabledThisState = false;
|
|
int CompressedBufferSizeInkByteThisState = 0;
|
|
double dummy;
|
|
|
|
v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
|
|
|
|
v->BandwidthWithoutPrefetchSupported[i][j] = true;
|
|
if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
|
|
+ v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
|
|
v->BandwidthWithoutPrefetchSupported[i][j] = false;
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
|
|
v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
|
|
v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
|
|
v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
|
|
v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
|
|
v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
|
|
v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
|
|
v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
|
|
v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
|
|
}
|
|
|
|
VMDataOnlyReturnBWPerState = dml_min(
|
|
dml_min(
|
|
v->ReturnBusWidth * v->DCFCLKState[i][j],
|
|
v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
|
|
* v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
|
|
v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
|
|
* v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
|
|
if (v->GPUVMEnable && v->HostVMEnable)
|
|
HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
|
|
|
|
v->ExtraLatency = CalculateExtraLatency(
|
|
v->RoundTripPingLatencyCycles,
|
|
ReorderingBytes,
|
|
v->DCFCLKState[i][j],
|
|
v->TotalNumberOfActiveDPP[i][j],
|
|
v->PixelChunkSizeInKByte,
|
|
v->TotalNumberOfDCCActiveDPP[i][j],
|
|
v->MetaChunkSize,
|
|
v->ReturnBWPerState[i][j],
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->NumberOfActivePlanes,
|
|
v->NoOfDPPThisState,
|
|
v->dpte_group_bytes,
|
|
HostVMInefficiencyFactor,
|
|
v->HostVMMinPageSize,
|
|
v->HostVMMaxNonCachedPageTableLevels);
|
|
|
|
v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
|
|
do {
|
|
v->PrefetchModePerState[i][j] = NextPrefetchModeState;
|
|
v->MaxVStartup = v->NextMaxVStartup;
|
|
|
|
v->TWait = CalculateTWait(
|
|
v->PrefetchModePerState[i][j],
|
|
v->DRAMClockChangeLatency,
|
|
v->UrgLatency[i],
|
|
v->SREnterPlusExitTime);
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
CalculatePrefetchSchedulePerPlane(mode_lib,
|
|
HostVMInefficiencyFactor,
|
|
i, j, k);
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
CalculateUrgentBurstFactor(
|
|
v->swath_width_luma_ub_this_state[k],
|
|
v->swath_width_chroma_ub_this_state[k],
|
|
v->SwathHeightYThisState[k],
|
|
v->SwathHeightCThisState[k],
|
|
v->HTotal[k] / v->PixelClock[k],
|
|
v->UrgentLatency,
|
|
v->CursorBufferSize,
|
|
v->CursorWidth[k][0],
|
|
v->CursorBPP[k][0],
|
|
v->VRatioPreY[i][j][k],
|
|
v->VRatioPreC[i][j][k],
|
|
v->BytePerPixelInDETY[k],
|
|
v->BytePerPixelInDETC[k],
|
|
v->DETBufferSizeYThisState[k],
|
|
v->DETBufferSizeCThisState[k],
|
|
&v->UrgentBurstFactorCursorPre[k],
|
|
&v->UrgentBurstFactorLumaPre[k],
|
|
&v->UrgentBurstFactorChromaPre[k],
|
|
&v->NotUrgentLatencyHidingPre[k]);
|
|
}
|
|
|
|
v->MaximumReadBandwidthWithPrefetch = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
|
|
/ (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
|
|
|
|
v->MaximumReadBandwidthWithPrefetch =
|
|
v->MaximumReadBandwidthWithPrefetch
|
|
+ dml_max3(
|
|
v->VActivePixelBandwidth[i][j][k]
|
|
+ v->VActiveCursorBandwidth[i][j][k]
|
|
+ v->NoOfDPP[i][j][k]
|
|
* (v->meta_row_bandwidth[i][j][k]
|
|
+ v->dpte_row_bandwidth[i][j][k]),
|
|
v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
|
|
v->NoOfDPP[i][j][k]
|
|
* (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
|
|
* v->UrgentBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixelDataBWChroma[i][j][k]
|
|
* v->UrgentBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
|
|
}
|
|
|
|
v->NotEnoughUrgentLatencyHidingPre = false;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->NotUrgentLatencyHidingPre[k] == true) {
|
|
v->NotEnoughUrgentLatencyHidingPre = true;
|
|
}
|
|
}
|
|
|
|
v->PrefetchSupported[i][j] = true;
|
|
if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
|
|
|| v->NotEnoughUrgentLatencyHidingPre == 1) {
|
|
v->PrefetchSupported[i][j] = false;
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
|
|
|| v->NoTimeForPrefetch[i][j][k] == true) {
|
|
v->PrefetchSupported[i][j] = false;
|
|
}
|
|
}
|
|
|
|
v->DynamicMetadataSupported[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
|
|
v->DynamicMetadataSupported[i][j] = false;
|
|
}
|
|
}
|
|
|
|
v->VRatioInPrefetchSupported[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
|
|
v->VRatioInPrefetchSupported[i][j] = false;
|
|
}
|
|
}
|
|
v->AnyLinesForVMOrRowTooLarge = false;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
|
|
v->AnyLinesForVMOrRowTooLarge = true;
|
|
}
|
|
}
|
|
|
|
v->NextPrefetchMode = v->NextPrefetchMode + 1;
|
|
|
|
if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
|
|
v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
|
|
- dml_max(
|
|
v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
|
|
v->NoOfDPP[i][j][k]
|
|
* (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
|
|
* v->UrgentBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixelDataBWChroma[i][j][k]
|
|
* v->UrgentBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
|
|
}
|
|
v->TotImmediateFlipBytes = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
|
|
+ v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
|
|
+ v->DPTEBytesPerRow[i][j][k];
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
CalculateFlipSchedule(
|
|
mode_lib,
|
|
k,
|
|
HostVMInefficiencyFactor,
|
|
v->ExtraLatency,
|
|
v->UrgLatency[i],
|
|
v->PDEAndMetaPTEBytesPerFrame[i][j][k],
|
|
v->MetaRowBytes[i][j][k],
|
|
v->DPTEBytesPerRow[i][j][k]);
|
|
}
|
|
v->total_dcn_read_bw_with_flip = 0.0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
|
|
+ dml_max3(
|
|
v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
|
|
v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
|
|
+ v->VActiveCursorBandwidth[i][j][k],
|
|
v->NoOfDPP[i][j][k]
|
|
* (v->final_flip_bw[k]
|
|
+ v->RequiredPrefetchPixelDataBWLuma[i][j][k]
|
|
* v->UrgentBurstFactorLumaPre[k]
|
|
+ v->RequiredPrefetchPixelDataBWChroma[i][j][k]
|
|
* v->UrgentBurstFactorChromaPre[k])
|
|
+ v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
|
|
}
|
|
v->ImmediateFlipSupportedForState[i][j] = true;
|
|
if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
|
|
v->ImmediateFlipSupportedForState[i][j] = false;
|
|
}
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->ImmediateFlipSupportedForPipe[k] == false) {
|
|
v->ImmediateFlipSupportedForState[i][j] = false;
|
|
}
|
|
}
|
|
} else {
|
|
v->ImmediateFlipSupportedForState[i][j] = false;
|
|
}
|
|
|
|
if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
|
|
v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
|
|
NextPrefetchModeState = NextPrefetchModeState + 1;
|
|
} else {
|
|
v->NextMaxVStartup = v->NextMaxVStartup - 1;
|
|
}
|
|
v->NextPrefetchMode = v->NextPrefetchMode + 1;
|
|
} while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
|
|
&& ((v->HostVMEnable == false &&
|
|
v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
|
|
|| v->ImmediateFlipSupportedForState[i][j] == true))
|
|
|| (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
|
|
|
|
CalculateUnboundedRequestAndCompressedBufferSize(
|
|
v->DETBufferSizeInKByte[0],
|
|
v->ConfigReturnBufferSizeInKByte,
|
|
v->UseUnboundedRequesting,
|
|
v->TotalNumberOfActiveDPP[i][j],
|
|
NoChroma,
|
|
v->MaxNumDPP,
|
|
v->CompressedBufferSegmentSizeInkByte,
|
|
v->Output,
|
|
&UnboundedRequestEnabledThisState,
|
|
&CompressedBufferSizeInkByteThisState);
|
|
|
|
CalculateWatermarksAndDRAMSpeedChangeSupport(
|
|
mode_lib,
|
|
v->PrefetchModePerState[i][j],
|
|
v->DCFCLKState[i][j],
|
|
v->ReturnBWPerState[i][j],
|
|
v->UrgLatency[i],
|
|
v->ExtraLatency,
|
|
v->SOCCLKPerState[i],
|
|
v->ProjectedDCFCLKDeepSleep[i][j],
|
|
v->DETBufferSizeYThisState,
|
|
v->DETBufferSizeCThisState,
|
|
v->SwathHeightYThisState,
|
|
v->SwathHeightCThisState,
|
|
v->SwathWidthYThisState,
|
|
v->SwathWidthCThisState,
|
|
v->NoOfDPPThisState,
|
|
v->BytePerPixelInDETY,
|
|
v->BytePerPixelInDETC,
|
|
UnboundedRequestEnabledThisState,
|
|
CompressedBufferSizeInkByteThisState,
|
|
&v->DRAMClockChangeSupport[i][j],
|
|
&dummy,
|
|
&dummy,
|
|
&dummy,
|
|
&dummy);
|
|
}
|
|
}
|
|
|
|
/*PTE Buffer Size Check*/
|
|
for (i = 0; i < v->soc.num_states; i++) {
|
|
for (j = 0; j < 2; j++) {
|
|
v->PTEBufferSizeNotExceeded[i][j] = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
|
|
v->PTEBufferSizeNotExceeded[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*Cursor Support Check*/
|
|
v->CursorSupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->CursorWidth[k][0] > 0.0) {
|
|
if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
|
|
v->CursorSupport = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*Valid Pitch Check*/
|
|
v->PitchSupport = true;
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
|
|
if (v->DCCEnable[k] == true) {
|
|
v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
|
|
} else {
|
|
v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
|
|
}
|
|
if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
|
|
&& v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
|
|
&& v->SourcePixelFormat[k] != dm_mono_8) {
|
|
v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
|
|
if (v->DCCEnable[k] == true) {
|
|
v->AlignedDCCMetaPitchC[k] = dml_ceil(
|
|
dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
|
|
64.0 * v->Read256BlockWidthC[k]);
|
|
} else {
|
|
v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
|
|
}
|
|
} else {
|
|
v->AlignedCPitch[k] = v->PitchC[k];
|
|
v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
|
|
}
|
|
if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
|
|
|| v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
|
|
v->PitchSupport = false;
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; k++) {
|
|
if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
|
|
ViewportExceedsSurface = true;
|
|
if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
|
|
&& v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
|
|
&& v->SourcePixelFormat[k] != dm_rgbe) {
|
|
if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
|
|
|| v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
|
|
ViewportExceedsSurface = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*Mode Support, Voltage State and SOC Configuration*/
|
|
for (i = v->soc.num_states - 1; i >= 0; i--) {
|
|
for (j = 0; j < 2; j++) {
|
|
if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
|
|
&& v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
|
|
&& !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
|
|
&& v->DTBCLKRequiredMoreThanSupported[i] == false
|
|
&& v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
|
|
&& v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
|
|
&& v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
|
|
&& v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
|
|
&& v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
|
|
&& v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
|
|
&& v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
|
|
&& ((v->HostVMEnable == false
|
|
&& v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
|
|
|| v->ImmediateFlipSupportedForState[i][j] == true)
|
|
&& FMTBufferExceeded == false) {
|
|
v->ModeSupport[i][j] = true;
|
|
} else {
|
|
v->ModeSupport[i][j] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
unsigned int MaximumMPCCombine = 0;
|
|
for (i = v->soc.num_states; i >= 0; i--) {
|
|
if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
|
|
v->VoltageLevel = i;
|
|
v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
|
|
if (v->ModeSupport[i][0] == true) {
|
|
MaximumMPCCombine = 0;
|
|
} else {
|
|
MaximumMPCCombine = 1;
|
|
}
|
|
}
|
|
}
|
|
v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
|
|
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
|
|
v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
|
|
v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
|
|
}
|
|
v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
|
|
v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
|
|
v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
|
|
v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
|
|
v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
|
|
v->maxMpcComb = MaximumMPCCombine;
|
|
}
|
|
}
|
|
|
|
static void CalculateWatermarksAndDRAMSpeedChangeSupport(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int PrefetchMode,
|
|
double DCFCLK,
|
|
double ReturnBW,
|
|
double UrgentLatency,
|
|
double ExtraLatency,
|
|
double SOCCLK,
|
|
double DCFCLKDeepSleep,
|
|
unsigned int DETBufferSizeY[],
|
|
unsigned int DETBufferSizeC[],
|
|
unsigned int SwathHeightY[],
|
|
unsigned int SwathHeightC[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
unsigned int DPPPerPlane[],
|
|
double BytePerPixelDETY[],
|
|
double BytePerPixelDETC[],
|
|
bool UnboundedRequestEnabled,
|
|
int unsigned CompressedBufferSizeInkByte,
|
|
enum clock_change_support *DRAMClockChangeSupport,
|
|
double *StutterExitWatermark,
|
|
double *StutterEnterPlusExitWatermark,
|
|
double *Z8StutterExitWatermark,
|
|
double *Z8StutterEnterPlusExitWatermark)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
double EffectiveLBLatencyHidingY;
|
|
double EffectiveLBLatencyHidingC;
|
|
double LinesInDETY[DC__NUM_DPP__MAX];
|
|
double LinesInDETC;
|
|
unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
|
|
unsigned int LinesInDETCRoundedDownToSwath;
|
|
double FullDETBufferingTimeY;
|
|
double FullDETBufferingTimeC;
|
|
double ActiveDRAMClockChangeLatencyMarginY;
|
|
double ActiveDRAMClockChangeLatencyMarginC;
|
|
double WritebackDRAMClockChangeLatencyMargin;
|
|
double PlaneWithMinActiveDRAMClockChangeMargin;
|
|
double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
|
|
double WritebackDRAMClockChangeLatencyHiding;
|
|
double TotalPixelBW = 0.0;
|
|
int k, j;
|
|
|
|
v->UrgentWatermark = UrgentLatency + ExtraLatency;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
|
|
dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
|
|
dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
|
|
#endif
|
|
|
|
v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
|
|
dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
|
|
#endif
|
|
|
|
v->TotalActiveWriteback = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->WritebackEnable[k] == true) {
|
|
v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
|
|
}
|
|
}
|
|
|
|
if (v->TotalActiveWriteback <= 1) {
|
|
v->WritebackUrgentWatermark = v->WritebackLatency;
|
|
} else {
|
|
v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
|
|
}
|
|
|
|
if (v->TotalActiveWriteback <= 1) {
|
|
v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
|
|
} else {
|
|
v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
TotalPixelBW = TotalPixelBW
|
|
+ DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
|
|
/ (v->HTotal[k] / v->PixelClock[k]);
|
|
}
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
double EffectiveDETBufferSizeY = DETBufferSizeY[k];
|
|
|
|
v->LBLatencyHidingSourceLinesY = dml_min(
|
|
(double) v->MaxLineBufferLines,
|
|
dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
|
|
|
|
v->LBLatencyHidingSourceLinesC = dml_min(
|
|
(double) v->MaxLineBufferLines,
|
|
dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
|
|
|
|
EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
|
|
|
|
EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
|
|
|
|
if (UnboundedRequestEnabled) {
|
|
EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
|
|
+ CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
|
|
}
|
|
|
|
LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
|
|
LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
|
|
FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
|
|
if (BytePerPixelDETC[k] > 0) {
|
|
LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
|
|
LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
|
|
FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
|
|
} else {
|
|
LinesInDETC = 0;
|
|
FullDETBufferingTimeC = 999999;
|
|
}
|
|
|
|
ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
|
|
- ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
|
|
|
|
if (v->NumberOfActivePlanes > 1) {
|
|
ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
|
|
- (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
|
|
}
|
|
|
|
if (BytePerPixelDETC[k] > 0) {
|
|
ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
|
|
- ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
|
|
|
|
if (v->NumberOfActivePlanes > 1) {
|
|
ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
|
|
- (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
|
|
}
|
|
v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
|
|
} else {
|
|
v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
|
|
}
|
|
|
|
if (v->WritebackEnable[k] == true) {
|
|
WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
|
|
/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
|
|
if (v->WritebackPixelFormat[k] == dm_444_64) {
|
|
WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
|
|
}
|
|
WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
|
|
v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
|
|
}
|
|
}
|
|
|
|
v->MinActiveDRAMClockChangeMargin = 999999;
|
|
PlaneWithMinActiveDRAMClockChangeMargin = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
|
|
v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
PlaneWithMinActiveDRAMClockChangeMargin = k;
|
|
} else {
|
|
for (j = 0; j < v->NumberOfActivePlanes; ++j) {
|
|
if (v->BlendingAndTiming[k] == j) {
|
|
PlaneWithMinActiveDRAMClockChangeMargin = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
|
|
|
|
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
|
|
&& v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
|
|
SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
|
|
}
|
|
}
|
|
|
|
v->TotalNumberOfActiveOTG = 0;
|
|
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
|
|
}
|
|
}
|
|
|
|
if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
|
|
*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
|
|
} else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
|
|
|| SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
|
|
*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
|
|
} else {
|
|
*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
|
|
}
|
|
|
|
*StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
|
|
*StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
|
|
*Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
|
|
*Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
|
|
dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
|
|
dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
|
|
dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
|
|
#endif
|
|
}
|
|
|
|
static void CalculateDCFCLKDeepSleep(
|
|
struct display_mode_lib *mode_lib,
|
|
unsigned int NumberOfActivePlanes,
|
|
int BytePerPixelY[],
|
|
int BytePerPixelC[],
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
unsigned int DPPPerPlane[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
double PixelClock[],
|
|
double PSCL_THROUGHPUT[],
|
|
double PSCL_THROUGHPUT_CHROMA[],
|
|
double DPPCLK[],
|
|
double ReadBandwidthLuma[],
|
|
double ReadBandwidthChroma[],
|
|
int ReturnBusWidth,
|
|
double *DCFCLKDeepSleep)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
double DisplayPipeLineDeliveryTimeLuma;
|
|
double DisplayPipeLineDeliveryTimeChroma;
|
|
double ReadBandwidth = 0.0;
|
|
int k;
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
|
|
if (VRatio[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
|
|
}
|
|
if (BytePerPixelC[k] == 0) {
|
|
DisplayPipeLineDeliveryTimeChroma = 0;
|
|
} else {
|
|
if (VRatioChroma[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
|
|
}
|
|
}
|
|
|
|
if (BytePerPixelC[k] > 0) {
|
|
v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
|
|
__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
|
|
} else {
|
|
v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
|
|
}
|
|
v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
|
|
|
|
}
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
|
|
}
|
|
|
|
*DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
*DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
|
|
}
|
|
}
|
|
|
|
static void CalculateUrgentBurstFactor(
|
|
int swath_width_luma_ub,
|
|
int swath_width_chroma_ub,
|
|
unsigned int SwathHeightY,
|
|
unsigned int SwathHeightC,
|
|
double LineTime,
|
|
double UrgentLatency,
|
|
double CursorBufferSize,
|
|
unsigned int CursorWidth,
|
|
unsigned int CursorBPP,
|
|
double VRatio,
|
|
double VRatioC,
|
|
double BytePerPixelInDETY,
|
|
double BytePerPixelInDETC,
|
|
double DETBufferSizeY,
|
|
double DETBufferSizeC,
|
|
double *UrgentBurstFactorCursor,
|
|
double *UrgentBurstFactorLuma,
|
|
double *UrgentBurstFactorChroma,
|
|
bool *NotEnoughUrgentLatencyHiding)
|
|
{
|
|
double LinesInDETLuma;
|
|
double LinesInDETChroma;
|
|
unsigned int LinesInCursorBuffer;
|
|
double CursorBufferSizeInTime;
|
|
double DETBufferSizeInTimeLuma;
|
|
double DETBufferSizeInTimeChroma;
|
|
|
|
*NotEnoughUrgentLatencyHiding = 0;
|
|
|
|
if (CursorWidth > 0) {
|
|
LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
|
|
if (VRatio > 0) {
|
|
CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
|
|
if (CursorBufferSizeInTime - UrgentLatency <= 0) {
|
|
*NotEnoughUrgentLatencyHiding = 1;
|
|
*UrgentBurstFactorCursor = 0;
|
|
} else {
|
|
*UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
|
|
}
|
|
} else {
|
|
*UrgentBurstFactorCursor = 1;
|
|
}
|
|
}
|
|
|
|
LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
|
|
if (VRatio > 0) {
|
|
DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
|
|
if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
|
|
*NotEnoughUrgentLatencyHiding = 1;
|
|
*UrgentBurstFactorLuma = 0;
|
|
} else {
|
|
*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
|
|
}
|
|
} else {
|
|
*UrgentBurstFactorLuma = 1;
|
|
}
|
|
|
|
if (BytePerPixelInDETC > 0) {
|
|
LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
|
|
if (VRatio > 0) {
|
|
DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
|
|
if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
|
|
*NotEnoughUrgentLatencyHiding = 1;
|
|
*UrgentBurstFactorChroma = 0;
|
|
} else {
|
|
*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
|
|
}
|
|
} else {
|
|
*UrgentBurstFactorChroma = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void CalculatePixelDeliveryTimes(
|
|
unsigned int NumberOfActivePlanes,
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double VRatioPrefetchY[],
|
|
double VRatioPrefetchC[],
|
|
unsigned int swath_width_luma_ub[],
|
|
unsigned int swath_width_chroma_ub[],
|
|
unsigned int DPPPerPlane[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
double PixelClock[],
|
|
double PSCL_THROUGHPUT[],
|
|
double PSCL_THROUGHPUT_CHROMA[],
|
|
double DPPCLK[],
|
|
int BytePerPixelC[],
|
|
enum scan_direction_class SourceScan[],
|
|
unsigned int NumberOfCursors[],
|
|
unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
|
|
unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
|
|
unsigned int BlockWidth256BytesY[],
|
|
unsigned int BlockHeight256BytesY[],
|
|
unsigned int BlockWidth256BytesC[],
|
|
unsigned int BlockHeight256BytesC[],
|
|
double DisplayPipeLineDeliveryTimeLuma[],
|
|
double DisplayPipeLineDeliveryTimeChroma[],
|
|
double DisplayPipeLineDeliveryTimeLumaPrefetch[],
|
|
double DisplayPipeLineDeliveryTimeChromaPrefetch[],
|
|
double DisplayPipeRequestDeliveryTimeLuma[],
|
|
double DisplayPipeRequestDeliveryTimeChroma[],
|
|
double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
|
|
double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
|
|
double CursorRequestDeliveryTime[],
|
|
double CursorRequestDeliveryTimePrefetch[])
|
|
{
|
|
double req_per_swath_ub;
|
|
int k;
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (VRatio[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
|
|
}
|
|
|
|
if (BytePerPixelC[k] == 0) {
|
|
DisplayPipeLineDeliveryTimeChroma[k] = 0;
|
|
} else {
|
|
if (VRatioChroma[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
|
|
}
|
|
}
|
|
|
|
if (VRatioPrefetchY[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
|
|
}
|
|
|
|
if (BytePerPixelC[k] == 0) {
|
|
DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
|
|
} else {
|
|
if (VRatioPrefetchC[k] <= 1) {
|
|
DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
|
|
} else {
|
|
DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
|
|
}
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (SourceScan[k] != dm_vert) {
|
|
req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
|
|
} else {
|
|
req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
|
|
}
|
|
DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
|
|
DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
|
|
if (BytePerPixelC[k] == 0) {
|
|
DisplayPipeRequestDeliveryTimeChroma[k] = 0;
|
|
DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
|
|
} else {
|
|
if (SourceScan[k] != dm_vert) {
|
|
req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
|
|
} else {
|
|
req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
|
|
}
|
|
DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
|
|
DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
|
|
dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
|
|
dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
|
|
dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
|
|
dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
|
|
#endif
|
|
}
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
int cursor_req_per_width;
|
|
cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
|
|
if (NumberOfCursors[k] > 0) {
|
|
if (VRatio[k] <= 1) {
|
|
CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
|
|
} else {
|
|
CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
|
|
}
|
|
if (VRatioPrefetchY[k] <= 1) {
|
|
CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
|
|
} else {
|
|
CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
|
|
}
|
|
} else {
|
|
CursorRequestDeliveryTime[k] = 0;
|
|
CursorRequestDeliveryTimePrefetch[k] = 0;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
|
|
dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
|
|
dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
static void CalculateMetaAndPTETimes(
|
|
int NumberOfActivePlanes,
|
|
bool GPUVMEnable,
|
|
int MetaChunkSize,
|
|
int MinMetaChunkSizeBytes,
|
|
int HTotal[],
|
|
double VRatio[],
|
|
double VRatioChroma[],
|
|
double DestinationLinesToRequestRowInVBlank[],
|
|
double DestinationLinesToRequestRowInImmediateFlip[],
|
|
bool DCCEnable[],
|
|
double PixelClock[],
|
|
int BytePerPixelY[],
|
|
int BytePerPixelC[],
|
|
enum scan_direction_class SourceScan[],
|
|
int dpte_row_height[],
|
|
int dpte_row_height_chroma[],
|
|
int meta_row_width[],
|
|
int meta_row_width_chroma[],
|
|
int meta_row_height[],
|
|
int meta_row_height_chroma[],
|
|
int meta_req_width[],
|
|
int meta_req_width_chroma[],
|
|
int meta_req_height[],
|
|
int meta_req_height_chroma[],
|
|
int dpte_group_bytes[],
|
|
int PTERequestSizeY[],
|
|
int PTERequestSizeC[],
|
|
int PixelPTEReqWidthY[],
|
|
int PixelPTEReqHeightY[],
|
|
int PixelPTEReqWidthC[],
|
|
int PixelPTEReqHeightC[],
|
|
int dpte_row_width_luma_ub[],
|
|
int dpte_row_width_chroma_ub[],
|
|
double DST_Y_PER_PTE_ROW_NOM_L[],
|
|
double DST_Y_PER_PTE_ROW_NOM_C[],
|
|
double DST_Y_PER_META_ROW_NOM_L[],
|
|
double DST_Y_PER_META_ROW_NOM_C[],
|
|
double TimePerMetaChunkNominal[],
|
|
double TimePerChromaMetaChunkNominal[],
|
|
double TimePerMetaChunkVBlank[],
|
|
double TimePerChromaMetaChunkVBlank[],
|
|
double TimePerMetaChunkFlip[],
|
|
double TimePerChromaMetaChunkFlip[],
|
|
double time_per_pte_group_nom_luma[],
|
|
double time_per_pte_group_vblank_luma[],
|
|
double time_per_pte_group_flip_luma[],
|
|
double time_per_pte_group_nom_chroma[],
|
|
double time_per_pte_group_vblank_chroma[],
|
|
double time_per_pte_group_flip_chroma[])
|
|
{
|
|
unsigned int meta_chunk_width;
|
|
unsigned int min_meta_chunk_width;
|
|
unsigned int meta_chunk_per_row_int;
|
|
unsigned int meta_row_remainder;
|
|
unsigned int meta_chunk_threshold;
|
|
unsigned int meta_chunks_per_row_ub;
|
|
unsigned int meta_chunk_width_chroma;
|
|
unsigned int min_meta_chunk_width_chroma;
|
|
unsigned int meta_chunk_per_row_int_chroma;
|
|
unsigned int meta_row_remainder_chroma;
|
|
unsigned int meta_chunk_threshold_chroma;
|
|
unsigned int meta_chunks_per_row_ub_chroma;
|
|
unsigned int dpte_group_width_luma;
|
|
unsigned int dpte_groups_per_row_luma_ub;
|
|
unsigned int dpte_group_width_chroma;
|
|
unsigned int dpte_groups_per_row_chroma_ub;
|
|
int k;
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
|
|
if (BytePerPixelC[k] == 0) {
|
|
DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
|
|
} else {
|
|
DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
|
|
}
|
|
DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
|
|
if (BytePerPixelC[k] == 0) {
|
|
DST_Y_PER_META_ROW_NOM_C[k] = 0;
|
|
} else {
|
|
DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (DCCEnable[k] == true) {
|
|
meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
|
|
min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
|
|
meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
|
|
meta_row_remainder = meta_row_width[k] % meta_chunk_width;
|
|
if (SourceScan[k] != dm_vert) {
|
|
meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
|
|
} else {
|
|
meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
|
|
}
|
|
if (meta_row_remainder <= meta_chunk_threshold) {
|
|
meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
|
|
} else {
|
|
meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
|
|
}
|
|
TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
|
|
TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
|
|
TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
|
|
if (BytePerPixelC[k] == 0) {
|
|
TimePerChromaMetaChunkNominal[k] = 0;
|
|
TimePerChromaMetaChunkVBlank[k] = 0;
|
|
TimePerChromaMetaChunkFlip[k] = 0;
|
|
} else {
|
|
meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
|
|
min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
|
|
meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
|
|
meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
|
|
if (SourceScan[k] != dm_vert) {
|
|
meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
|
|
} else {
|
|
meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
|
|
}
|
|
if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
|
|
meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
|
|
} else {
|
|
meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
|
|
}
|
|
TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
|
|
TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
|
|
TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
|
|
}
|
|
} else {
|
|
TimePerMetaChunkNominal[k] = 0;
|
|
TimePerMetaChunkVBlank[k] = 0;
|
|
TimePerMetaChunkFlip[k] = 0;
|
|
TimePerChromaMetaChunkNominal[k] = 0;
|
|
TimePerChromaMetaChunkVBlank[k] = 0;
|
|
TimePerChromaMetaChunkFlip[k] = 0;
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (GPUVMEnable == true) {
|
|
if (SourceScan[k] != dm_vert) {
|
|
dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
|
|
} else {
|
|
dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
|
|
}
|
|
dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
|
|
time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
|
|
time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
|
|
time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
|
|
if (BytePerPixelC[k] == 0) {
|
|
time_per_pte_group_nom_chroma[k] = 0;
|
|
time_per_pte_group_vblank_chroma[k] = 0;
|
|
time_per_pte_group_flip_chroma[k] = 0;
|
|
} else {
|
|
if (SourceScan[k] != dm_vert) {
|
|
dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
|
|
} else {
|
|
dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
|
|
}
|
|
dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
|
|
time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
|
|
time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
|
|
time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
|
|
}
|
|
} else {
|
|
time_per_pte_group_nom_luma[k] = 0;
|
|
time_per_pte_group_vblank_luma[k] = 0;
|
|
time_per_pte_group_flip_luma[k] = 0;
|
|
time_per_pte_group_nom_chroma[k] = 0;
|
|
time_per_pte_group_vblank_chroma[k] = 0;
|
|
time_per_pte_group_flip_chroma[k] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void CalculateVMGroupAndRequestTimes(
|
|
unsigned int NumberOfActivePlanes,
|
|
bool GPUVMEnable,
|
|
unsigned int GPUVMMaxPageTableLevels,
|
|
unsigned int HTotal[],
|
|
int BytePerPixelC[],
|
|
double DestinationLinesToRequestVMInVBlank[],
|
|
double DestinationLinesToRequestVMInImmediateFlip[],
|
|
bool DCCEnable[],
|
|
double PixelClock[],
|
|
int dpte_row_width_luma_ub[],
|
|
int dpte_row_width_chroma_ub[],
|
|
int vm_group_bytes[],
|
|
unsigned int dpde0_bytes_per_frame_ub_l[],
|
|
unsigned int dpde0_bytes_per_frame_ub_c[],
|
|
int meta_pte_bytes_per_frame_ub_l[],
|
|
int meta_pte_bytes_per_frame_ub_c[],
|
|
double TimePerVMGroupVBlank[],
|
|
double TimePerVMGroupFlip[],
|
|
double TimePerVMRequestVBlank[],
|
|
double TimePerVMRequestFlip[])
|
|
{
|
|
int num_group_per_lower_vm_stage;
|
|
int num_req_per_lower_vm_stage;
|
|
int k;
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
|
|
if (DCCEnable[k] == false) {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
|
|
} else {
|
|
num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
|
|
}
|
|
} else {
|
|
if (GPUVMMaxPageTableLevels == 1) {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
|
|
} else {
|
|
num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
|
|
}
|
|
} else {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
|
|
} else {
|
|
num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
|
|
+ dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (DCCEnable[k] == false) {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
|
|
} else {
|
|
num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
|
|
}
|
|
} else {
|
|
if (GPUVMMaxPageTableLevels == 1) {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
|
|
} else {
|
|
num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
|
|
}
|
|
} else {
|
|
if (BytePerPixelC[k] > 0) {
|
|
num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
|
|
+ meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
|
|
} else {
|
|
num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
|
|
}
|
|
}
|
|
}
|
|
|
|
TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
|
|
TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
|
|
TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
|
|
TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
|
|
|
|
if (GPUVMMaxPageTableLevels > 2) {
|
|
TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
|
|
TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
|
|
TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
|
|
TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
|
|
}
|
|
|
|
} else {
|
|
TimePerVMGroupVBlank[k] = 0;
|
|
TimePerVMGroupFlip[k] = 0;
|
|
TimePerVMRequestVBlank[k] = 0;
|
|
TimePerVMRequestFlip[k] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void CalculateStutterEfficiency(
|
|
struct display_mode_lib *mode_lib,
|
|
int CompressedBufferSizeInkByte,
|
|
bool UnboundedRequestEnabled,
|
|
int ConfigReturnBufferSizeInKByte,
|
|
int MetaFIFOSizeInKEntries,
|
|
int ZeroSizeBufferEntries,
|
|
int NumberOfActivePlanes,
|
|
int ROBBufferSizeInKByte,
|
|
double TotalDataReadBandwidth,
|
|
double DCFCLK,
|
|
double ReturnBW,
|
|
double COMPBUF_RESERVED_SPACE_64B,
|
|
double COMPBUF_RESERVED_SPACE_ZS,
|
|
double SRExitTime,
|
|
double SRExitZ8Time,
|
|
bool SynchronizedVBlank,
|
|
double Z8StutterEnterPlusExitWatermark,
|
|
double StutterEnterPlusExitWatermark,
|
|
bool ProgressiveToInterlaceUnitInOPP,
|
|
bool Interlace[],
|
|
double MinTTUVBlank[],
|
|
int DPPPerPlane[],
|
|
unsigned int DETBufferSizeY[],
|
|
int BytePerPixelY[],
|
|
double BytePerPixelDETY[],
|
|
double SwathWidthY[],
|
|
int SwathHeightY[],
|
|
int SwathHeightC[],
|
|
double NetDCCRateLuma[],
|
|
double NetDCCRateChroma[],
|
|
double DCCFractionOfZeroSizeRequestsLuma[],
|
|
double DCCFractionOfZeroSizeRequestsChroma[],
|
|
int HTotal[],
|
|
int VTotal[],
|
|
double PixelClock[],
|
|
double VRatio[],
|
|
enum scan_direction_class SourceScan[],
|
|
int BlockHeight256BytesY[],
|
|
int BlockWidth256BytesY[],
|
|
int BlockHeight256BytesC[],
|
|
int BlockWidth256BytesC[],
|
|
int DCCYMaxUncompressedBlock[],
|
|
int DCCCMaxUncompressedBlock[],
|
|
int VActive[],
|
|
bool DCCEnable[],
|
|
bool WritebackEnable[],
|
|
double ReadBandwidthPlaneLuma[],
|
|
double ReadBandwidthPlaneChroma[],
|
|
double meta_row_bw[],
|
|
double dpte_row_bw[],
|
|
double *StutterEfficiencyNotIncludingVBlank,
|
|
double *StutterEfficiency,
|
|
int *NumberOfStutterBurstsPerFrame,
|
|
double *Z8StutterEfficiencyNotIncludingVBlank,
|
|
double *Z8StutterEfficiency,
|
|
int *Z8NumberOfStutterBurstsPerFrame,
|
|
double *StutterPeriod)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
|
|
double DETBufferingTimeY;
|
|
double SwathWidthYCriticalPlane = 0;
|
|
double VActiveTimeCriticalPlane = 0;
|
|
double FrameTimeCriticalPlane = 0;
|
|
int BytePerPixelYCriticalPlane = 0;
|
|
double LinesToFinishSwathTransferStutterCriticalPlane = 0;
|
|
double MinTTUVBlankCriticalPlane = 0;
|
|
double TotalCompressedReadBandwidth;
|
|
double TotalRowReadBandwidth;
|
|
double AverageDCCCompressionRate;
|
|
double EffectiveCompressedBufferSize;
|
|
double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
|
|
double StutterBurstTime;
|
|
int TotalActiveWriteback;
|
|
double LinesInDETY;
|
|
double LinesInDETYRoundedDownToSwath;
|
|
double MaximumEffectiveCompressionLuma;
|
|
double MaximumEffectiveCompressionChroma;
|
|
double TotalZeroSizeRequestReadBandwidth;
|
|
double TotalZeroSizeCompressedReadBandwidth;
|
|
double AverageDCCZeroSizeFraction;
|
|
double AverageZeroSizeCompressionRate;
|
|
int TotalNumberOfActiveOTG = 0;
|
|
double LastStutterPeriod = 0.0;
|
|
double LastZ8StutterPeriod = 0.0;
|
|
int k;
|
|
|
|
TotalZeroSizeRequestReadBandwidth = 0;
|
|
TotalZeroSizeCompressedReadBandwidth = 0;
|
|
TotalRowReadBandwidth = 0;
|
|
TotalCompressedReadBandwidth = 0;
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (DCCEnable[k] == true) {
|
|
if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
|
|
|| DCCYMaxUncompressedBlock[k] < 256) {
|
|
MaximumEffectiveCompressionLuma = 2;
|
|
} else {
|
|
MaximumEffectiveCompressionLuma = 4;
|
|
}
|
|
TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
|
|
TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
|
|
TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
|
|
+ ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
|
|
if (ReadBandwidthPlaneChroma[k] > 0) {
|
|
if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
|
|
|| (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
|
|
MaximumEffectiveCompressionChroma = 2;
|
|
} else {
|
|
MaximumEffectiveCompressionChroma = 4;
|
|
}
|
|
TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
|
|
+ ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
|
|
TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
|
|
TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
|
|
+ ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
|
|
}
|
|
} else {
|
|
TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
|
|
}
|
|
TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
|
|
}
|
|
|
|
AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
|
|
AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
|
|
dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
|
|
dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
|
|
dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
|
|
dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
|
|
dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
|
|
dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
|
|
dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
|
|
#endif
|
|
|
|
if (AverageDCCZeroSizeFraction == 1) {
|
|
AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
|
|
EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
|
|
} else if (AverageDCCZeroSizeFraction > 0) {
|
|
AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
|
|
EffectiveCompressedBufferSize = dml_min(
|
|
CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
|
|
MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
|
|
+ dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
|
|
(ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
|
|
dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
|
|
dml_print(
|
|
"DML::%s: min 2 = %f\n",
|
|
__func__,
|
|
MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
|
|
dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
|
|
dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
|
|
} else {
|
|
EffectiveCompressedBufferSize = dml_min(
|
|
CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
|
|
MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
|
|
dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
|
|
dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
|
|
dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
|
|
dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
|
|
#endif
|
|
|
|
*StutterPeriod = 0;
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
|
|
/ BytePerPixelDETY[k] / SwathWidthY[k];
|
|
LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
|
|
DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
|
|
dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
|
|
dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
|
|
dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
|
|
dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
|
|
dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
|
|
dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
|
|
dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
|
|
dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
|
|
dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
|
|
dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
|
|
dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
|
|
#endif
|
|
|
|
if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
|
|
bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
|
|
|
|
*StutterPeriod = DETBufferingTimeY;
|
|
FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
|
|
VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
|
|
BytePerPixelYCriticalPlane = BytePerPixelY[k];
|
|
SwathWidthYCriticalPlane = SwathWidthY[k];
|
|
LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
|
|
MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
|
|
dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
|
|
dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
|
|
dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
|
|
dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
|
|
dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
|
|
dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
|
|
dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
|
|
dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
|
|
dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
|
|
dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
|
|
dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
|
|
dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
|
|
dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
|
|
dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
|
|
dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
|
|
#endif
|
|
|
|
StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
|
|
+ (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
|
|
+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
|
|
dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
|
|
dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
|
|
dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
|
|
dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
|
|
#endif
|
|
StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
|
|
|
|
dml_print(
|
|
"DML::%s: Time to finish residue swath=%f\n",
|
|
__func__,
|
|
LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
|
|
|
|
TotalActiveWriteback = 0;
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (WritebackEnable[k]) {
|
|
TotalActiveWriteback = TotalActiveWriteback + 1;
|
|
}
|
|
}
|
|
|
|
if (TotalActiveWriteback == 0) {
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
|
|
dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
|
|
dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
|
|
dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
|
|
#endif
|
|
*StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
|
|
*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
|
|
*NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
|
|
*Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
|
|
} else {
|
|
*StutterEfficiencyNotIncludingVBlank = 0.;
|
|
*Z8StutterEfficiencyNotIncludingVBlank = 0.;
|
|
*NumberOfStutterBurstsPerFrame = 0;
|
|
*Z8NumberOfStutterBurstsPerFrame = 0;
|
|
}
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
|
|
dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
|
|
dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
|
|
dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
|
|
dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
|
|
#endif
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (v->BlendingAndTiming[k] == k) {
|
|
TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
|
|
}
|
|
}
|
|
|
|
if (*StutterEfficiencyNotIncludingVBlank > 0) {
|
|
LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
|
|
|
|
if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
|
|
*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
|
|
/ *StutterPeriod) / FrameTimeCriticalPlane) * 100;
|
|
} else {
|
|
*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
|
|
}
|
|
} else {
|
|
*StutterEfficiency = 0;
|
|
}
|
|
|
|
if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
|
|
LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
|
|
if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
|
|
*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
|
|
/ *StutterPeriod) / FrameTimeCriticalPlane) * 100;
|
|
} else {
|
|
*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
|
|
}
|
|
} else {
|
|
*Z8StutterEfficiency = 0.;
|
|
}
|
|
|
|
dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
|
|
dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
|
|
dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
|
|
dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
|
|
dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
|
|
dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
|
|
dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
|
|
dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
|
|
}
|
|
|
|
static void CalculateSwathAndDETConfiguration(
|
|
bool ForceSingleDPP,
|
|
int NumberOfActivePlanes,
|
|
bool DETSharedByAllDPP,
|
|
unsigned int DETBufferSizeInKByteA[],
|
|
double MaximumSwathWidthLuma[],
|
|
double MaximumSwathWidthChroma[],
|
|
enum scan_direction_class SourceScan[],
|
|
enum source_format_class SourcePixelFormat[],
|
|
enum dm_swizzle_mode SurfaceTiling[],
|
|
int ViewportWidth[],
|
|
int ViewportHeight[],
|
|
int SurfaceWidthY[],
|
|
int SurfaceWidthC[],
|
|
int SurfaceHeightY[],
|
|
int SurfaceHeightC[],
|
|
int Read256BytesBlockHeightY[],
|
|
int Read256BytesBlockHeightC[],
|
|
int Read256BytesBlockWidthY[],
|
|
int Read256BytesBlockWidthC[],
|
|
enum odm_combine_mode ODMCombineEnabled[],
|
|
int BlendingAndTiming[],
|
|
int BytePerPixY[],
|
|
int BytePerPixC[],
|
|
double BytePerPixDETY[],
|
|
double BytePerPixDETC[],
|
|
int HActive[],
|
|
double HRatio[],
|
|
double HRatioChroma[],
|
|
int DPPPerPlane[],
|
|
int swath_width_luma_ub[],
|
|
int swath_width_chroma_ub[],
|
|
double SwathWidth[],
|
|
double SwathWidthChroma[],
|
|
int SwathHeightY[],
|
|
int SwathHeightC[],
|
|
unsigned int DETBufferSizeY[],
|
|
unsigned int DETBufferSizeC[],
|
|
bool ViewportSizeSupportPerPlane[],
|
|
bool *ViewportSizeSupport)
|
|
{
|
|
int MaximumSwathHeightY[DC__NUM_DPP__MAX];
|
|
int MaximumSwathHeightC[DC__NUM_DPP__MAX];
|
|
int MinimumSwathHeightY;
|
|
int MinimumSwathHeightC;
|
|
int RoundedUpMaxSwathSizeBytesY;
|
|
int RoundedUpMaxSwathSizeBytesC;
|
|
int RoundedUpMinSwathSizeBytesY;
|
|
int RoundedUpMinSwathSizeBytesC;
|
|
int RoundedUpSwathSizeBytesY;
|
|
int RoundedUpSwathSizeBytesC;
|
|
double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
|
|
double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
|
|
int k;
|
|
|
|
CalculateSwathWidth(
|
|
ForceSingleDPP,
|
|
NumberOfActivePlanes,
|
|
SourcePixelFormat,
|
|
SourceScan,
|
|
ViewportWidth,
|
|
ViewportHeight,
|
|
SurfaceWidthY,
|
|
SurfaceWidthC,
|
|
SurfaceHeightY,
|
|
SurfaceHeightC,
|
|
ODMCombineEnabled,
|
|
BytePerPixY,
|
|
BytePerPixC,
|
|
Read256BytesBlockHeightY,
|
|
Read256BytesBlockHeightC,
|
|
Read256BytesBlockWidthY,
|
|
Read256BytesBlockWidthC,
|
|
BlendingAndTiming,
|
|
HActive,
|
|
HRatio,
|
|
DPPPerPlane,
|
|
SwathWidthSingleDPP,
|
|
SwathWidthSingleDPPChroma,
|
|
SwathWidth,
|
|
SwathWidthChroma,
|
|
MaximumSwathHeightY,
|
|
MaximumSwathHeightC,
|
|
swath_width_luma_ub,
|
|
swath_width_chroma_ub);
|
|
|
|
*ViewportSizeSupport = true;
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
|
|
|
|
if (DETSharedByAllDPP && DPPPerPlane[k])
|
|
DETBufferSizeInKByte /= DPPPerPlane[k];
|
|
if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
|
|
|| SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
|
|
if (SurfaceTiling[k] == dm_sw_linear
|
|
|| (SourcePixelFormat[k] == dm_444_64
|
|
&& (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
|
|
&& SourceScan[k] != dm_vert)) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k];
|
|
} else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k];
|
|
} else {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
|
|
}
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k];
|
|
} else {
|
|
if (SurfaceTiling[k] == dm_sw_linear) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k];
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k];
|
|
} else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k];
|
|
} else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
|
|
} else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k];
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
|
|
} else {
|
|
MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
|
|
MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
|
|
}
|
|
}
|
|
|
|
RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
|
|
RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
|
|
if (SourcePixelFormat[k] == dm_420_10) {
|
|
RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
|
|
RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
|
|
}
|
|
RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
|
|
RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
|
|
if (SourcePixelFormat[k] == dm_420_10) {
|
|
RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
|
|
RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
|
|
}
|
|
|
|
if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
|
|
SwathHeightY[k] = MaximumSwathHeightY[k];
|
|
SwathHeightC[k] = MaximumSwathHeightC[k];
|
|
RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
|
|
RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
|
|
} else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
|
|
&& RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
|
|
SwathHeightY[k] = MinimumSwathHeightY;
|
|
SwathHeightC[k] = MaximumSwathHeightC[k];
|
|
RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
|
|
RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
|
|
} else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
|
|
&& RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
|
|
SwathHeightY[k] = MaximumSwathHeightY[k];
|
|
SwathHeightC[k] = MinimumSwathHeightC;
|
|
RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
|
|
RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
|
|
} else {
|
|
SwathHeightY[k] = MinimumSwathHeightY;
|
|
SwathHeightC[k] = MinimumSwathHeightC;
|
|
RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
|
|
RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
|
|
}
|
|
{
|
|
double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
|
|
if (SwathHeightC[k] == 0) {
|
|
DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
|
|
DETBufferSizeC[k] = 0;
|
|
} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
|
|
DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
|
|
DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
|
|
} else {
|
|
DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
|
|
DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
|
|
}
|
|
|
|
if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
|
|
|| (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
|
|
*ViewportSizeSupport = false;
|
|
ViewportSizeSupportPerPlane[k] = false;
|
|
} else {
|
|
ViewportSizeSupportPerPlane[k] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void CalculateSwathWidth(
|
|
bool ForceSingleDPP,
|
|
int NumberOfActivePlanes,
|
|
enum source_format_class SourcePixelFormat[],
|
|
enum scan_direction_class SourceScan[],
|
|
int ViewportWidth[],
|
|
int ViewportHeight[],
|
|
int SurfaceWidthY[],
|
|
int SurfaceWidthC[],
|
|
int SurfaceHeightY[],
|
|
int SurfaceHeightC[],
|
|
enum odm_combine_mode ODMCombineEnabled[],
|
|
int BytePerPixY[],
|
|
int BytePerPixC[],
|
|
int Read256BytesBlockHeightY[],
|
|
int Read256BytesBlockHeightC[],
|
|
int Read256BytesBlockWidthY[],
|
|
int Read256BytesBlockWidthC[],
|
|
int BlendingAndTiming[],
|
|
int HActive[],
|
|
double HRatio[],
|
|
int DPPPerPlane[],
|
|
double SwathWidthSingleDPPY[],
|
|
double SwathWidthSingleDPPC[],
|
|
double SwathWidthY[],
|
|
double SwathWidthC[],
|
|
int MaximumSwathHeightY[],
|
|
int MaximumSwathHeightC[],
|
|
int swath_width_luma_ub[],
|
|
int swath_width_chroma_ub[])
|
|
{
|
|
enum odm_combine_mode MainPlaneODMCombine;
|
|
int j, k;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
|
|
#endif
|
|
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
if (SourceScan[k] != dm_vert) {
|
|
SwathWidthSingleDPPY[k] = ViewportWidth[k];
|
|
} else {
|
|
SwathWidthSingleDPPY[k] = ViewportHeight[k];
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
|
|
dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
|
|
#endif
|
|
|
|
MainPlaneODMCombine = ODMCombineEnabled[k];
|
|
for (j = 0; j < NumberOfActivePlanes; ++j) {
|
|
if (BlendingAndTiming[k] == j) {
|
|
MainPlaneODMCombine = ODMCombineEnabled[j];
|
|
}
|
|
}
|
|
|
|
if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
|
|
SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
|
|
} else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
|
|
SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
|
|
} else if (DPPPerPlane[k] == 2) {
|
|
SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
|
|
} else {
|
|
SwathWidthY[k] = SwathWidthSingleDPPY[k];
|
|
}
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
|
|
dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
|
|
#endif
|
|
|
|
if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
|
|
SwathWidthC[k] = SwathWidthY[k] / 2;
|
|
SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
|
|
} else {
|
|
SwathWidthC[k] = SwathWidthY[k];
|
|
SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
|
|
}
|
|
|
|
if (ForceSingleDPP == true) {
|
|
SwathWidthY[k] = SwathWidthSingleDPPY[k];
|
|
SwathWidthC[k] = SwathWidthSingleDPPC[k];
|
|
}
|
|
{
|
|
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
|
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
|
|
#endif
|
|
|
|
if (SourceScan[k] != dm_vert) {
|
|
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
|
|
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
|
|
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
|
|
if (BytePerPixC[k] > 0) {
|
|
int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
|
|
|
swath_width_chroma_ub[k] = dml_min(
|
|
surface_width_ub_c,
|
|
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
|
|
} else {
|
|
swath_width_chroma_ub[k] = 0;
|
|
}
|
|
} else {
|
|
MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
|
|
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
|
|
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
|
|
if (BytePerPixC[k] > 0) {
|
|
int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
|
|
|
swath_width_chroma_ub[k] = dml_min(
|
|
surface_height_ub_c,
|
|
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
|
|
} else {
|
|
swath_width_chroma_ub[k] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static double CalculateExtraLatency(
|
|
int RoundTripPingLatencyCycles,
|
|
int ReorderingBytes,
|
|
double DCFCLK,
|
|
int TotalNumberOfActiveDPP,
|
|
int PixelChunkSizeInKByte,
|
|
int TotalNumberOfDCCActiveDPP,
|
|
int MetaChunkSize,
|
|
double ReturnBW,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
int NumberOfActivePlanes,
|
|
int NumberOfDPP[],
|
|
int dpte_group_bytes[],
|
|
double HostVMInefficiencyFactor,
|
|
double HostVMMinPageSize,
|
|
int HostVMMaxNonCachedPageTableLevels)
|
|
{
|
|
double ExtraLatencyBytes;
|
|
double ExtraLatency;
|
|
|
|
ExtraLatencyBytes = CalculateExtraLatencyBytes(
|
|
ReorderingBytes,
|
|
TotalNumberOfActiveDPP,
|
|
PixelChunkSizeInKByte,
|
|
TotalNumberOfDCCActiveDPP,
|
|
MetaChunkSize,
|
|
GPUVMEnable,
|
|
HostVMEnable,
|
|
NumberOfActivePlanes,
|
|
NumberOfDPP,
|
|
dpte_group_bytes,
|
|
HostVMInefficiencyFactor,
|
|
HostVMMinPageSize,
|
|
HostVMMaxNonCachedPageTableLevels);
|
|
|
|
ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
|
|
dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
|
|
dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
|
|
dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
|
|
dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
|
|
#endif
|
|
|
|
return ExtraLatency;
|
|
}
|
|
|
|
static double CalculateExtraLatencyBytes(
|
|
int ReorderingBytes,
|
|
int TotalNumberOfActiveDPP,
|
|
int PixelChunkSizeInKByte,
|
|
int TotalNumberOfDCCActiveDPP,
|
|
int MetaChunkSize,
|
|
bool GPUVMEnable,
|
|
bool HostVMEnable,
|
|
int NumberOfActivePlanes,
|
|
int NumberOfDPP[],
|
|
int dpte_group_bytes[],
|
|
double HostVMInefficiencyFactor,
|
|
double HostVMMinPageSize,
|
|
int HostVMMaxNonCachedPageTableLevels)
|
|
{
|
|
double ret;
|
|
int HostVMDynamicLevels = 0, k;
|
|
|
|
if (GPUVMEnable == true && HostVMEnable == true) {
|
|
if (HostVMMinPageSize < 2048) {
|
|
HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
|
|
} else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
|
|
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
|
|
} else {
|
|
HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
|
|
}
|
|
} else {
|
|
HostVMDynamicLevels = 0;
|
|
}
|
|
|
|
ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
|
|
|
|
if (GPUVMEnable == true) {
|
|
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
|
ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static double CalculateUrgentLatency(
|
|
double UrgentLatencyPixelDataOnly,
|
|
double UrgentLatencyPixelMixedWithVMData,
|
|
double UrgentLatencyVMDataOnly,
|
|
bool DoUrgentLatencyAdjustment,
|
|
double UrgentLatencyAdjustmentFabricClockComponent,
|
|
double UrgentLatencyAdjustmentFabricClockReference,
|
|
double FabricClock)
|
|
{
|
|
double ret;
|
|
|
|
ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
|
|
if (DoUrgentLatencyAdjustment == true) {
|
|
ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static void UseMinimumDCFCLK(
|
|
struct display_mode_lib *mode_lib,
|
|
int MaxPrefetchMode,
|
|
int ReorderingBytes)
|
|
{
|
|
struct vba_vars_st *v = &mode_lib->vba;
|
|
int dummy1, i, j, k;
|
|
double NormalEfficiency, dummy2, dummy3;
|
|
double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
|
|
|
|
NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
|
|
for (i = 0; i < v->soc.num_states; ++i) {
|
|
for (j = 0; j <= 1; ++j) {
|
|
double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
|
|
double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
|
|
double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
|
|
double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
|
|
double MinimumTWait;
|
|
double NonDPTEBandwidth;
|
|
double DPTEBandwidth;
|
|
double DCFCLKRequiredForAverageBandwidth;
|
|
double ExtraLatencyBytes;
|
|
double ExtraLatencyCycles;
|
|
double DCFCLKRequiredForPeakBandwidth;
|
|
int NoOfDPPState[DC__NUM_DPP__MAX];
|
|
double MinimumTvmPlus2Tr0;
|
|
|
|
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
|
|
+ v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
|
|
}
|
|
|
|
for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
|
|
NoOfDPPState[k] = v->NoOfDPP[i][j][k];
|
|
}
|
|
|
|
MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
|
|
NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
|
|
DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
|
|
TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
|
|
DCFCLKRequiredForAverageBandwidth = dml_max3(
|
|
v->ProjectedDCFCLKDeepSleep[i][j],
|
|
(NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
|
|
/ (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
|
|
(NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
|
|
|
|
ExtraLatencyBytes = CalculateExtraLatencyBytes(
|
|
ReorderingBytes,
|
|
v->TotalNumberOfActiveDPP[i][j],
|
|
v->PixelChunkSizeInKByte,
|
|
v->TotalNumberOfDCCActiveDPP[i][j],
|
|
v->MetaChunkSize,
|
|
v->GPUVMEnable,
|
|
v->HostVMEnable,
|
|
v->NumberOfActivePlanes,
|
|
NoOfDPPState,
|
|
v->dpte_group_bytes,
|
|
1,
|
|
v->HostVMMinPageSize,
|
|
v->HostVMMaxNonCachedPageTableLevels);
|
|
ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
double DCFCLKCyclesRequiredInPrefetch;
|
|
double ExpectedPrefetchBWAcceleration;
|
|
double PrefetchTime;
|
|
|
|
PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
|
|
+ v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
|
|
DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
|
|
+ v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
|
|
+ 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
|
|
+ 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
|
|
PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
|
|
ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
|
|
/ (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
|
|
DynamicMetadataVMExtraLatency[k] =
|
|
(v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
|
|
v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
|
|
PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
|
|
- v->UrgLatency[i]
|
|
* ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
|
|
* (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
|
|
- DynamicMetadataVMExtraLatency[k];
|
|
|
|
if (PrefetchTime > 0) {
|
|
double ExpectedVRatioPrefetch;
|
|
ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
|
|
/ (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
|
|
* dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
|
|
if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
|
|
+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
|
|
}
|
|
} else {
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
|
|
}
|
|
if (v->DynamicMetadataEnable[k] == true) {
|
|
double TSetupPipe;
|
|
double TdmbfPipe;
|
|
double TdmsksPipe;
|
|
double TdmecPipe;
|
|
double AllowedTimeForUrgentExtraLatency;
|
|
|
|
CalculateVupdateAndDynamicMetadataParameters(
|
|
v->MaxInterDCNTileRepeaters,
|
|
v->RequiredDPPCLK[i][j][k],
|
|
v->RequiredDISPCLK[i][j],
|
|
v->ProjectedDCFCLKDeepSleep[i][j],
|
|
v->PixelClock[k],
|
|
v->HTotal[k],
|
|
v->VTotal[k] - v->VActive[k],
|
|
v->DynamicMetadataTransmittedBytes[k],
|
|
v->DynamicMetadataLinesBeforeActiveRequired[k],
|
|
v->Interlace[k],
|
|
v->ProgressiveToInterlaceUnitInOPP,
|
|
&TSetupPipe,
|
|
&TdmbfPipe,
|
|
&TdmecPipe,
|
|
&TdmsksPipe,
|
|
&dummy1,
|
|
&dummy2,
|
|
&dummy3);
|
|
AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
|
|
- TdmsksPipe - DynamicMetadataVMExtraLatency[k];
|
|
if (AllowedTimeForUrgentExtraLatency > 0) {
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k],
|
|
ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
|
|
} else {
|
|
DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
|
|
}
|
|
}
|
|
}
|
|
DCFCLKRequiredForPeakBandwidth = 0;
|
|
for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
|
|
DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
|
|
}
|
|
MinimumTvmPlus2Tr0 = v->UrgLatency[i]
|
|
* (v->GPUVMEnable == true ?
|
|
(v->HostVMEnable == true ?
|
|
(v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
|
|
0);
|
|
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
|
|
double MaximumTvmPlus2Tr0PlusTsw;
|
|
MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
|
|
if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
|
|
DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
|
|
} else {
|
|
DCFCLKRequiredForPeakBandwidth = dml_max3(
|
|
DCFCLKRequiredForPeakBandwidth,
|
|
2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
|
|
(2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
|
|
}
|
|
}
|
|
v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void CalculateUnboundedRequestAndCompressedBufferSize(
|
|
unsigned int DETBufferSizeInKByte,
|
|
int ConfigReturnBufferSizeInKByte,
|
|
enum unbounded_requesting_policy UseUnboundedRequestingFinal,
|
|
int TotalActiveDPP,
|
|
bool NoChromaPlanes,
|
|
int MaxNumDPP,
|
|
int CompressedBufferSegmentSizeInkByteFinal,
|
|
enum output_encoder_class *Output,
|
|
bool *UnboundedRequestEnabled,
|
|
int *CompressedBufferSizeInkByte)
|
|
{
|
|
double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
|
|
|
|
*UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
|
|
*CompressedBufferSizeInkByte = (
|
|
*UnboundedRequestEnabled == true ?
|
|
ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
|
|
ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
|
|
*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
|
|
|
|
#ifdef __DML_VBA_DEBUG__
|
|
dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
|
|
dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
|
|
dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
|
|
dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
|
|
dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
|
|
dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
|
|
dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
|
|
#endif
|
|
}
|
|
|
|
static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
|
|
{
|
|
bool ret_val = false;
|
|
|
|
ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
|
|
if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
|
|
ret_val = false;
|
|
}
|
|
return (ret_val);
|
|
}
|
|
|