cpp
复制代码
bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost)
{
CodingStructure &cs = *cu.cs;
const int width = partitioner.currArea().lwidth();
const int height = partitioner.currArea().lheight();
//===== loop over partitions =====
const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() );
// variables for saving fast intra modes scan results across multiple LFNST passes
double costInterCU = xFindInterCUCost( cu );
bool validReturn = false;
//===== determine set of modes to be tested (using prediction signal only) =====
int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList;
static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList;
static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2];
if (m_pcEncCfg->m_numIntraModesFullRD > 0)
numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD;
#if INTRA_FULL_SEARCH
numModesForFullRD = numModesAvailable;
#endif
const SPS& sps = *cu.cs->sps;
const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize()));
const int SizeThr = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 );
const bool testMip = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT );
bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
if (testISP)
{
int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0);
// the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with
// ISP due to size restrictions
numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
{
m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0);
}
testISP = m_ispTestedModes[0].numTotalParts[0];
}
else
{
m_ispTestedModes[0].init(0, 0, 0);
}
xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip);
CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" );
// after this point, don't use numModesForFullRD
if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable )
{
double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO;
int maxSize = -1;
ModeInfo bestMipMode;
int bestMipIdx = -1;
for( int idx = 0; idx < RdModeList.size(); idx++ )
{
if( RdModeList[idx].mipFlg )
{
bestMipMode = RdModeList[idx];
bestMipIdx = idx;
break;
}
}
const int numHadCand = 3;
for (int k = numHadCand - 1; k >= 0; k--)
{
if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
}
if (maxSize > 0)
{
RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize));
if( bestMipIdx >= 0 )
{
if( RdModeList.size() <= bestMipIdx )
{
RdModeList.push_back(bestMipMode);
m_SortedPelUnitBufs->swap( maxSize, bestMipIdx );
}
}
}
if (maxSize == 0)
{
cs.dist = MAX_DISTORTION;
cs.interHad = 0;
return false;
}
}
//===== check modes (using r-d costs) =====
ModeInfo bestPUMode;
CodingStructure *csTemp = m_pTempCS;
CodingStructure *csBest = m_pBestCS;
csTemp->slice = csBest->slice = cs.slice;
csTemp->picture = csBest->picture = cs.picture;
csTemp->compactResize( cu );
csBest->compactResize( cu );
csTemp->initStructData();
csBest->initStructData();
int bestLfnstIdx = 0;
const bool useBDPCM = cs.picture->useBDPCM;
int NumBDPCMCand = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0;
int bestbdpcmMode = 0;
int bestISP = 0;
int bestMrl = 0;
bool bestMip = 0;
int EndMode = (int)RdModeList.size();
bool useISPlfnst = testISP && sps.LFNST;
bool noLFNST_ts = false;
double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE };
bool disableMTS = false;
bool disableLFNST = false;
bool disableDCT2test = false;
if (m_pcEncCfg->m_FastIntraTools)
{
int speedIntra = 0;
xSpeedUpIntra(bestCost, EndMode, speedIntra, cu);
disableMTS = (speedIntra >> 2 ) & 0x1;
disableLFNST = (speedIntra >> 1) & 0x1;
disableDCT2test = speedIntra>>3;
if (disableLFNST)
{
noLFNST_ts = true;
useISPlfnst = false;
}
if (speedIntra & 0x1)
{
testISP = false;
}
}
for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++)
{
int mode = mode_cur;
if (mode_cur >= EndMode)
{
mode = mode_cur - EndMode ? -1 : -2;
testISP = false;
}
// set CU/PU to luma prediction mode
ModeInfo testMode;
int noISP = 0;
int endISP = testISP ? 2 : 0;
bool noLFNST = false || noLFNST_ts;
if (mode && useISPlfnst)
{
noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4));
if (mode > 2)
{
endISP = 0;
testISP = false;
}
}
if (testISP)
{
xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx);
}
int startISP = 0;
if (disableDCT2test && mode && bestISP)
{
startISP = endISP ? 1 : 0;
}
for (int ispM = startISP; ispM <= endISP; ispM++)
{
if (ispM && (ispM == noISP))
{
continue;
}
if (mode < 0)
{
cu.bdpcmM[CH_L] = -mode;
testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX);
}
else
{
testMode = RdModeList[mode];
cu.bdpcmM[CH_L] = 0;
}
cu.ispMode = ispM;
cu.mipFlag = testMode.mipFlg;
cu.mipTransposedFlag = testMode.mipTrFlg;
cu.multiRefIdx = testMode.mRefId;
cu.intraDir[CH_L] = testMode.modeId;
if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) )
{
continue;
}
if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS))
{
m_ispTestedModes[0].intraWasTested = true;
}
CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported");
CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported");
// determine residual for partition
cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true);
int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode;
xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS);
DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod,
cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y])
{
csTemp->cost = MAX_DOUBLE;
csTemp->costDbOffset = 0;
}
if (useISPlfnst)
{
int n = (cu.ispMode == 0) ? 0 : 1;
bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n];
}
// check r-d cost
if (csTemp->cost < csBest->cost)
{
validReturn = true;
std::swap(csTemp, csBest);
bestPUMode = testMode;
bestLfnstIdx = csBest->cus[0]->lfnstIdx;
bestISP = csBest->cus[0]->ispMode;
bestMip = csBest->cus[0]->mipFlag;
bestMrl = csBest->cus[0]->multiRefIdx;
bestbdpcmMode = cu.bdpcmM[CH_L];
m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP);
if (csBest->cost < bestCost)
{
bestCost = csBest->cost;
}
if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 ))
{
noLFNST_ts = 1;
}
}
// reset context models
m_CABACEstimator->getCtx() = ctxStart;
csTemp->releaseIntermediateData();
if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0)
{
if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5))
{
//Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
EndMode = 0;
break;
}
}
}
} // Mode loop
if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS))
{
int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0;
bestMode |= bestLfnstIdx ? 2 : 0;
bestMode |= bestISP ? 1 : 0;
m_ispTestedModes[0].bestIntraMode = bestMode;
}
cu.ispMode = bestISP;
if( validReturn )
{
cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true );
const ReshapeData& reshapeData = cs.picture->reshapeData;
if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
{
cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf());
}
//=== update PU data ====
cu.lfnstIdx = bestLfnstIdx;
cu.mipTransposedFlag = bestPUMode.mipTrFlg;
cu.intraDir[CH_L] = bestPUMode.modeId;
cu.bdpcmM[CH_L] = bestbdpcmMode;
cu.mipFlag = bestMip;
cu.multiRefIdx = bestMrl;
}
else
{
THROW("fix this");
}
csBest->releaseIntermediateData();
return validReturn;
}