/*
* Copyright (c) 2020-2021, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file     decode_filmgrain_gennoise_rp1_packet_g12.cpp
//! \brief    film grain regress phase1 kernel render packet which used in by mediapipline.
//! \details  film grain regress phase1 render packet provide the structures and generate the cmd buffer which mediapipline will used.
//!

#include "decode_filmgrain_gennoise_rp1_packet_g12.h"
#include "decode_av1_filmgrain_feature_g12.h"
#include "decode_av1_feature_defs_g12.h"
#include "mos_defs.h"
#include "hal_oca_interface.h"

namespace decode
{

FilmGrainRp1Packet::FilmGrainRp1Packet(MediaPipeline *pipeline, MediaTask *task, CodechalHwInterface *hwInterface):
    CmdPacket(task),
    RenderCmdPacket(task, hwInterface->GetOsInterface(), hwInterface->GetRenderHalInterface())
{
        if (pipeline != nullptr)
        {
            m_statusReport   = pipeline->GetStatusReportInstance();
            m_featureManager = pipeline->GetFeatureManager();
            m_av1Pipeline    = dynamic_cast<Av1Pipeline *>(pipeline);
        }
        if (hwInterface != nullptr)
        {
            m_hwInterface    = hwInterface;
            m_miInterface    = hwInterface->GetMiInterface();
            m_osInterface    = hwInterface->GetOsInterface();
            m_vdencInterface = hwInterface->GetVdencInterface();
            m_renderHal      = hwInterface->GetRenderHalInterface();
        }
    }

MOS_STATUS FilmGrainRp1Packet::Init()
{
    DECODE_FUNC_CALL();
    DECODE_CHK_NULL(m_miInterface);
    DECODE_CHK_NULL(m_statusReport);
    DECODE_CHK_NULL(m_featureManager);
    DECODE_CHK_NULL(m_av1Pipeline);
    DECODE_CHK_NULL(m_osInterface);
    DECODE_CHK_NULL(m_vdencInterface);

    DECODE_CHK_STATUS(RenderCmdPacket::Init());

    m_av1BasicFeature = dynamic_cast<Av1BasicFeature *>(m_featureManager->GetFeature(FeatureIDs::basicFeature));
    DECODE_CHK_NULL(m_av1BasicFeature);

    m_filmGrainFeature = dynamic_cast<Av1DecodeFilmGrainG12 *>(m_featureManager->GetFeature(Av1FeatureIDs::av1SwFilmGrain));
    DECODE_CHK_NULL(m_filmGrainFeature);

    m_allocator = m_av1Pipeline->GetDecodeAllocator();
    DECODE_CHK_NULL(m_allocator);

    DECODE_CHK_STATUS(Initilize());

    return MOS_STATUS_SUCCESS;
}


MOS_STATUS FilmGrainRp1Packet::Prepare()
{
    DECODE_FUNC_CALL();

    DECODE_CHK_NULL(m_hwInterface);

    m_picParams = m_av1BasicFeature->m_av1PicParams;

    ResetBindingTableEntry();

    DECODE_CHK_STATUS(RenderEngineSetup());
    DECODE_CHK_STATUS(KernelStateSetup());
    DECODE_CHK_STATUS(SetUpSurfaceState());
    DECODE_CHK_STATUS(SetCurbeRegressPhase1());
    DECODE_CHK_STATUS(LoadKernel());

    if (m_walkerType == WALKER_TYPE_MEDIA)
    {
        DECODE_CHK_STATUS(SetupMediaWalker());
    }
    else if (m_walkerType == WALKER_TYPE_COMPUTE)
    {
        m_renderData.walkerParam.alignedRect.left   = 0;
        m_renderData.walkerParam.alignedRect.top    = 0;
        m_renderData.walkerParam.alignedRect.right  = m_av1BasicFeature->m_filmGrainProcParams->m_outputSurface->dwWidth;
        m_renderData.walkerParam.alignedRect.bottom = m_av1BasicFeature->m_filmGrainProcParams->m_outputSurface->dwHeight;
        m_renderData.walkerParam.iCurbeLength       = m_renderData.iCurbeLength;
        m_renderData.walkerParam.iCurbeOffset       = m_renderData.iCurbeOffset;
        m_renderData.walkerParam.iBindingTable      = m_renderData.bindingTable;
        m_renderData.walkerParam.iMediaID           = m_renderData.mediaID;
        m_renderData.walkerParam.iBlocksX           = m_renderData.KernelParam.blocks_x;
        m_renderData.walkerParam.iBlocksY           = m_renderData.KernelParam.blocks_y;
        DECODE_CHK_STATUS(PrepareComputeWalkerParams(m_renderData.walkerParam, m_gpgpuWalkerParams));
    }
    else
    {
        DECODE_ASSERTMESSAGE("Walker is disabled!");
        return MOS_STATUS_UNKNOWN;
    }

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
{
    PMOS_INTERFACE                  pOsInterface = nullptr;
    MOS_STATUS                      eStatus      = MOS_STATUS_SUCCESS;
    uint32_t                        dwSyncTag    = 0;
    int32_t                         i = 0, iRemaining = 0;
    PMHW_MI_INTERFACE               pMhwMiInterface     = nullptr;
    MhwRenderInterface *            pMhwRender          = nullptr;
    MHW_MEDIA_STATE_FLUSH_PARAM     FlushParam          = {};
    bool                            bEnableSLM          = false;
    RENDERHAL_GENERIC_PROLOG_PARAMS GenericPrologParams = {};
    MOS_RESOURCE                    GpuStatusBuffer     = {};
    MediaPerfProfiler *             pPerfProfiler       = nullptr;
    MOS_CONTEXT *                   pOsContext          = nullptr;
    PMHW_MI_MMIOREGISTERS           pMmioRegisters      = nullptr;

    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal);
    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pMhwRenderInterface);
    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pMhwMiInterface);
    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pMhwRenderInterface->GetMmioRegisters());
    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pOsInterface);
    RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pOsInterface->pOsContext);

    eStatus         = MOS_STATUS_UNKNOWN;
    pOsInterface    = m_renderHal->pOsInterface;
    pMhwMiInterface = m_renderHal->pMhwMiInterface;
    pMhwRender      = m_renderHal->pMhwRenderInterface;
    iRemaining      = 0;
    FlushParam      = g_cRenderHal_InitMediaStateFlushParams;
    pPerfProfiler  = m_renderHal->pPerfProfiler;
    pOsContext     = pOsInterface->pOsContext;
    pMmioRegisters = pMhwRender->GetMmioRegisters();

    RENDER_PACKET_CHK_STATUS_RETURN(SetPowerMode(CODECHAl_MEDIA_STATE_AV1_FILM_GRAIN_RP1));

    // Initialize command buffer and insert prolog
    RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnInitCommandBuffer(m_renderHal, commandBuffer, &GenericPrologParams));

    HalOcaInterface::On1stLevelBBStart(*commandBuffer, *m_osInterface->pOsContext, m_osInterface->CurrentGpuContextHandle,
        *m_hwInterface->GetMiInterface(), *m_hwInterface->GetMiInterface()->GetMmioRegisters());
    HalOcaInterface::TraceMessage(*commandBuffer, *m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));

    if (pOsInterface && !m_av1BasicFeature->m_singleKernelPerfFlag)
    {
        pOsInterface->pfnSetPerfTag(pOsInterface, ((PERFTAG_CALL_FILM_GRAIN_RP1_KERNEL << 8) | CODECHAL_DECODE_MODE_AV1VLD << 4 | m_av1BasicFeature->m_pictureCodingType));
        RENDER_PACKET_CHK_STATUS_RETURN(pPerfProfiler->AddPerfCollectStartCmd((void *)m_renderHal, pOsInterface, pMhwMiInterface, commandBuffer));
    }

    // Write timing data for 3P budget
    RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendTimingData(m_renderHal, commandBuffer, true));

    bEnableSLM = false;  // Media walker first
    RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSetCacheOverrideParams(
        m_renderHal,
        &m_renderHal->L3CacheSettings,
        bEnableSLM));

    // Flush media states
    RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendMediaStates(
        m_renderHal,
        commandBuffer,
        m_walkerType == WALKER_TYPE_MEDIA ? &m_mediaWalkerParams : nullptr,
        m_walkerType == WALKER_TYPE_MEDIA ? nullptr : &m_gpgpuWalkerParams));

    // Write back GPU Status tag
    if (!pOsInterface->bEnableKmdMediaFrameTracking)
    {
        RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendRcsStatusTag(m_renderHal, commandBuffer));
    }

    if (!m_av1BasicFeature->m_singleKernelPerfFlag)
    {
        RENDER_PACKET_CHK_STATUS_RETURN(pPerfProfiler->AddPerfCollectEndCmd((void *)m_renderHal, pOsInterface, pMhwMiInterface, commandBuffer));
    }

    // Write timing data for 3P budget
    RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendTimingData(m_renderHal, commandBuffer, false));

    MHW_PIPE_CONTROL_PARAMS PipeControlParams;

    MOS_ZeroMemory(&PipeControlParams, sizeof(PipeControlParams));
    PipeControlParams.dwFlushMode                   = MHW_FLUSH_WRITE_CACHE;
    PipeControlParams.bGenericMediaStateClear       = false;
    PipeControlParams.bIndirectStatePointersDisable = true;
    PipeControlParams.bDisableCSStall               = false;
    RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddPipeControl(commandBuffer, nullptr, &PipeControlParams));

    if (MEDIA_IS_WA(m_renderHal->pWaTable, WaSendDummyVFEafterPipelineSelect))
    {
        MHW_VFE_PARAMS VfeStateParams       = {};
        VfeStateParams.dwNumberofURBEntries = 1;
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwRender->AddMediaVfeCmd(commandBuffer, &VfeStateParams));
    }

    // Add media flush command in case HW not cleaning the media state
    if (MEDIA_IS_WA(m_renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
    {
        FlushParam.bFlushToGo = true;
        if (m_walkerType == WALKER_TYPE_MEDIA)
        {
            FlushParam.ui8InterfaceDescriptorOffset = m_mediaWalkerParams.InterfaceDescriptorOffset;
        }
        else
        {
            RENDER_PACKET_ASSERTMESSAGE("ERROR, pWalkerParams is nullptr and cannot get InterfaceDescriptorOffset.");
        }
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddMediaStateFlush(commandBuffer, nullptr, &FlushParam));
    }
    else if (MEDIA_IS_WA(m_renderHal->pWaTable, WaAddMediaStateFlushCmd))
    {
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddMediaStateFlush(commandBuffer, nullptr, &FlushParam));
    }

    HalOcaInterface::On1stLevelBBEnd(*commandBuffer, *m_osInterface);

    if (pBatchBuffer)
    {
        // Send Batch Buffer end command (HW/OS dependent)
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddMiBatchBufferEnd(commandBuffer, nullptr));
    }
    else if (IsMiBBEndNeeded(pOsInterface))
    {
        // Send Batch Buffer end command for 1st level Batch Buffer
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddMiBatchBufferEnd(commandBuffer, nullptr));
    }
    else if (m_renderHal->pOsInterface->bNoParsingAssistanceInKmd)
    {
        RENDER_PACKET_CHK_STATUS_RETURN(pMhwMiInterface->AddMiBatchBufferEnd(commandBuffer, nullptr));
    }

    // Return unused command buffer space to OS
    pOsInterface->pfnReturnCommandBuffer(pOsInterface, commandBuffer, 0);

    MOS_NULL_RENDERING_FLAGS NullRenderingFlags = pOsInterface->pfnGetNullHWRenderFlags(pOsInterface);

    if ((NullRenderingFlags.VPLgca ||
            NullRenderingFlags.VPGobal) == false)
    {
        dwSyncTag = m_renderHal->pStateHeap->dwNextTag++;

        // Set media state and batch buffer as busy
        m_renderHal->pStateHeap->pCurMediaState->bBusy = true;
        if (pBatchBuffer)
        {
            pBatchBuffer->bBusy     = true;
            pBatchBuffer->dwSyncTag = dwSyncTag;
        }
    }

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::SetupMediaWalker()
{
    DECODE_FUNC_CALL();

    DECODE_CHK_NULL(m_hwInterface);

    // Current only add Media Walker Support in film Grain
    m_walkerType = WALKER_TYPE_MEDIA;

    CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
    memset(&walkerCodecParams, 0, sizeof(walkerCodecParams));
    walkerCodecParams.WalkerMode    = MHW_WALKER_MODE_DUAL;
    walkerCodecParams.dwResolutionX = 1;
    walkerCodecParams.dwResolutionY = 32;
    walkerCodecParams.bNoDependency = true;

    DECODE_CHK_STATUS(CodecHalInitMediaObjectWalkerParams(m_hwInterface, &m_mediaWalkerParams, &walkerCodecParams));

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::Initilize()
{
    m_kernelIndex = regressPhase1;

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::KernelStateSetup()
{
    m_kernelCount                 = 1;
    MHW_KERNEL_STATE *kernelState = &m_filmGrainFeature->m_kernelStates[m_kernelIndex];
    uint32_t          btCount     = m_filmGrainFeature->m_filmGrainBindingTableCount[m_kernelIndex];
    int32_t           curbeLength = m_filmGrainFeature->m_filmGrainCurbeSize[m_kernelIndex];


    // Initialize States
    MOS_ZeroMemory(m_filter, sizeof(m_filter));
    MOS_ZeroMemory(&m_renderData.KernelEntry, sizeof(Kdll_CacheEntry));

    // Set Kernel Parameter
    m_renderData.KernelParam.GRF_Count          = 0;
    m_renderData.KernelParam.BT_Count           = btCount;
    m_renderData.KernelParam.Sampler_Count      = 0;
    m_renderData.KernelParam.Thread_Count       = m_renderHal->pMhwRenderInterface->GetHwCaps()->dwMaxThreads;
    m_renderData.KernelParam.GRF_Start_Register = 0;
    m_renderData.KernelParam.CURBE_Length       = curbeLength;
    m_renderData.KernelParam.block_width        = CODECHAL_MACROBLOCK_WIDTH;
    m_renderData.KernelParam.block_height       = CODECHAL_MACROBLOCK_HEIGHT;
    m_renderData.KernelParam.blocks_x           = 1;
    m_renderData.KernelParam.blocks_y           = 32;

    m_renderData.iCurbeOffset                   = m_renderHal->pMhwStateHeap->GetSizeofCmdInterfaceDescriptorData();

    // Set Parameters for Kernel Entry
    m_renderData.KernelEntry.iKUID       = 0;
    m_renderData.KernelEntry.iKCID       = m_kernelIndex;
    m_renderData.KernelEntry.iFilterSize = 2;
    m_renderData.KernelEntry.pFilter     = m_filter;
    m_renderData.KernelEntry.iSize       = kernelState->KernelParams.iSize;
    m_renderData.KernelEntry.pBinary     = kernelState->KernelParams.pBinary;

    // set Curbe/Inline Data length
    m_renderData.iInlineLength = 0;
    m_renderData.iCurbeLength  = 0;

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::SetUpSurfaceState()
{
    DECODE_FUNC_CALL();

    MOS_STATUS eStatus = MOS_STATUS_SUCCESS;

    //Set Surface States
    RENDERHAL_SURFACE_NEXT renderHalSurfaceNext;
    MOS_ZeroMemory(&renderHalSurfaceNext, sizeof(RENDERHAL_SURFACE_NEXT));

    //Y random values - input
    bool isWritable                  = false;
    RENDERHAL_SURFACE_STATE_PARAMS surfaceParams;
    MOS_ZeroMemory(&surfaceParams, sizeof(RENDERHAL_SURFACE_STATE_PARAMS));
    surfaceParams.MemObjCtl     = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_L3].Value;
    surfaceParams.bRenderTarget               = true;
    surfaceParams.Boundary                    = RENDERHAL_SS_BOUNDARY_ORIGINAL;

    m_bindingTableIndex[rp1InputYRandomValue] = SetSurfaceForHwAccess(
        m_filmGrainFeature->m_yRandomValuesSurface,
        &renderHalSurfaceNext,
        &surfaceParams,
        isWritable);
    DECODE_VERBOSEMESSAGE("RP1: surface[%d] Input Y Random values index: %d\n", rp1InputYRandomValue, m_bindingTableIndex[rp1InputYRandomValue]);

    //Y dithering surface - output
    isWritable = true;
    MOS_ZeroMemory(&surfaceParams, sizeof(RENDERHAL_SURFACE_STATE_PARAMS));
    surfaceParams.MemObjCtl     = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_L3].Value;
    surfaceParams.bRenderTarget = true;
    surfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
    MOS_ZeroMemory(&renderHalSurfaceNext, sizeof(RENDERHAL_SURFACE_NEXT));
    m_bindingTableIndex[rp1OutputYDitheringSurface] = SetSurfaceForHwAccess(
        m_filmGrainFeature->m_yDitheringTempSurface,
        &renderHalSurfaceNext,
        &surfaceParams,
        isWritable);
    DECODE_VERBOSEMESSAGE("RP1: surface[%d] Output Y Dithering surface BT index: %d\n", rp1OutputYDitheringSurface, m_bindingTableIndex[rp1OutputYDitheringSurface]);

    //Y coefficients - input
    isWritable = false;
    MOS_ZeroMemory(&surfaceParams, sizeof(RENDERHAL_SURFACE_STATE_PARAMS));
    surfaceParams.MemObjCtl     = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_L3].Value;
    surfaceParams.bRenderTarget = false;
    surfaceParams.Boundary      = RENDERHAL_SS_BOUNDARY_ORIGINAL;
    surfaceParams.bBufferUse    = true;
    MOS_ZeroMemory(&renderHalSurfaceNext, sizeof(RENDERHAL_SURFACE_NEXT));
    m_bindingTableIndex[rp1InputYCoeff] = SetBufferForHwAccess(
        *m_filmGrainFeature->m_yCoefficientsSurface,
        &renderHalSurfaceNext,
        &surfaceParams,
        isWritable);
    DECODE_VERBOSEMESSAGE("RP1: surface[%d] Input Y coeff BT index: %d\n", rp1InputYCoeff, m_bindingTableIndex[rp1InputYCoeff]);

    return MOS_STATUS_SUCCESS;
}

MOS_STATUS FilmGrainRp1Packet::SetCurbeRegressPhase1()
{
    MOS_STATUS eStatus = MOS_STATUS_SUCCESS;

    DECODE_FUNC_CALL();

    FilmGrainRegressPhase1Curbe curbe;
    curbe.DW0.YRandomValuesSurfaceIndex = rp1InputYRandomValue;
    curbe.DW1.YDitheringSurface         = rp1OutputYDitheringSurface;
    curbe.DW2.YCoeffSurface             = rp1InputYCoeff;

    DECODE_CHK_STATUS(SetupCurbe(
        &curbe,
        sizeof(FilmGrainRegressPhase1Curbe),
        m_renderData.KernelParam.Thread_Count));

    return eStatus;
}

MOS_STATUS FilmGrainRp1Packet::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
{
    DECODE_FUNC_CALL();

    MOS_STATUS eStatus = MOS_STATUS_SUCCESS;

    commandBufferSize      = m_hwInterface->GetKernelLoadCommandSize(m_renderData.KernelParam.BT_Count);
    requestedPatchListSize = 0;

    return MOS_STATUS_SUCCESS;
}

}
