Poor performance on H264 decoding

问题

I'm decoding a H264 video stream using Media Foundation. It works, but performance is really poor on both Windows (desktop) and HoloLens 2 (UWP). Decoding a single 1920x1080 frame takes 30 ms on a fairly powerful PC, 100 ms on the HoloLens .

I'm wondering if it's using software rendering instead of hardware rendering. Any idea on how to make sure that I'm doing hardware decoding, or any other hints on how to improve performance?

The code is as follows:

Decode.cpp:


#include "MFUtility.h"

#include <stdio.h>
#include <tchar.h>
#include <evr.h>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <wmcodecdsp.h>
#include <Codecapi.h>

#include <fstream>

#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid.lib")

#if _MSC_VER // this is defined when compiling with Visual Studio
#define EXPORT_API __declspec(dllexport) // Visual Studio needs annotating exported functions with this
#else
#define EXPORT_API // XCode does not need annotating exported functions, so define is empty
#endif

IMFTransform* m_pDecoderTransform = NULL; // This is H264 Decoder MFT.
int _sampleCount = 0;

EXTERN_C const CLSID CLSID_CMSH264DecoderMFT;

// Link following functions C-style (required for plugins)
extern "C"
{
    void EXPORT_API DecodeFrame(unsigned char* encodedFrame, int encodedFrameLength, unsigned char* decodedBuffer)
    {
        if (m_pDecoderTransform == NULL)
        {
            CHECK_HR(MFStartup(MF_VERSION),
                "Media Foundation initialisation failed.");

            IUnknown* m_pDecTransformUnk = NULL;
            IMFMediaType* m_pDecInputMediaType = NULL, * m_pDecOutputMediaType = NULL;
            DWORD m_mftStatus = 0;

            int width = 1920;
            int height = 1080;
            int frameRate = 30;

            CHECK_HR(CoCreateInstance(CLSID_CMSH264DecoderMFT, NULL, CLSCTX_INPROC_SERVER,
                IID_IUnknown, (void**)&m_pDecTransformUnk), "Failed to create H264 decoder MFT.\n");

            CHECK_HR(m_pDecTransformUnk->QueryInterface(
                IID_PPV_ARGS(&m_pDecoderTransform)),
                "Failed to get IMFTransform interface from H264 decoder MFT object.\n");

            IMFAttributes* decoderAttributes;
            CHECK_HR(m_pDecoderTransform->GetAttributes(&decoderAttributes),
                "Can't get attributes.");

            CHECK_HR(decoderAttributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, TRUE),
                "Failed to enable CODECAPI_AVDecVideoAcceleration_H264");

            CHECK_HR(decoderAttributes->SetUINT32(CODECAPI_AVLowLatencyMode, TRUE),
                "Failed to enable CODECAPI_AVLowLatencyMode");

            decoderAttributes->Release();

            MFCreateMediaType(&m_pDecInputMediaType);
            m_pDecInputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
            m_pDecInputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
            CHECK_HR(MFSetAttributeSize(m_pDecInputMediaType, MF_MT_FRAME_SIZE, width, height),
                "Failed to set image size");

            CHECK_HR(MFSetAttributeRatio(m_pDecInputMediaType, MF_MT_FRAME_RATE, frameRate, 1),
                "Failed to set frame rate on H264 MFT out type.\n");

            CHECK_HR(MFSetAttributeRatio(m_pDecInputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1),
                "Failed to set aspect ratio on H264 MFT out type.\n");

            CHECK_HR(m_pDecoderTransform->SetInputType(0, m_pDecInputMediaType, 0),
                "Failed to set input media type on H.264 decoder MFT.\n");

            MFCreateMediaType(&m_pDecOutputMediaType);
            m_pDecOutputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
            m_pDecOutputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV);

            CHECK_HR(MFSetAttributeSize(m_pDecOutputMediaType, MF_MT_FRAME_SIZE, width, height),
                "Failed to set frame size on H264 MFT out type.\n");

            CHECK_HR(MFSetAttributeRatio(m_pDecOutputMediaType, MF_MT_FRAME_RATE, frameRate, 1),
                "Failed to set frame rate on H264 MFT out type.\n");

            CHECK_HR(MFSetAttributeRatio(m_pDecOutputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1),
                "Failed to set aspect ratio on H264 MFT out type.\n");

            CHECK_HR(m_pDecoderTransform->SetOutputType(0, m_pDecOutputMediaType, 0),
                "Failed to set output media type on H.264 decoder MFT.\n");

            CHECK_HR(m_pDecoderTransform->GetInputStatus(0, &m_mftStatus),
                "Failed to get input status from H.264 decoder MFT.\n");

            if (MFT_INPUT_STATUS_ACCEPT_DATA != m_mftStatus)
            {
                printf("H.264 decoder MFT is not accepting data.\n");
                return;
            }

            CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL),
                "Failed to process FLUSH command on H.264 decoder MFT.\n");

            CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL),
                "Failed to process BEGIN_STREAMING command on H.264 decoder MFT.\n");

            CHECK_HR(m_pDecoderTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL),
                "Failed to process START_OF_STREAM command on H.264 decoder MFT.\n");
        }

        // Start processing frames.
        IMFSample* pCopyVideoSample = NULL, * pH264DecodeOutSample = NULL;
        BOOL h264DecodeTransformFlushed = FALSE;

        IMFMediaBuffer* pDstBuffer = NULL;

        MFCreateSample(&pCopyVideoSample);
        CHECK_HR(MFCreateMemoryBuffer(encodedFrameLength, &pDstBuffer), "Failed to create memory buffer.\n");
        CHECK_HR(pCopyVideoSample->AddBuffer(pDstBuffer), "Failed to add buffer to re-constructed sample.\n");

        byte* reconByteBuffer;
        DWORD reconBuffCurrLen = 0;
        DWORD reconBuffMaxLen = 0;
        CHECK_HR(pDstBuffer->Lock(&reconByteBuffer, &reconBuffMaxLen, &reconBuffCurrLen), "Error locking recon buffer.\n");
        memcpy(reconByteBuffer, encodedFrame, encodedFrameLength);
        CHECK_HR(pDstBuffer->Unlock(), "Error unlocking recon buffer.\n");
        pDstBuffer->SetCurrentLength(encodedFrameLength);

        CHECK_HR(m_pDecoderTransform->ProcessInput(0, pCopyVideoSample, 0),
            "The H264 decoder ProcessInput call failed.");

        HRESULT getOutputResult = GetTransformOutput(m_pDecoderTransform, &pH264DecodeOutSample, &h264DecodeTransformFlushed);

        if (getOutputResult != S_OK && getOutputResult != MF_E_TRANSFORM_NEED_MORE_INPUT) {
            OutputDebugStringA("Error getting H264 decoder transform output, error code %.2X. getOutputResult\n");
            goto done;
        }

        if (h264DecodeTransformFlushed == TRUE) {
            // H264 decoder format changed. Clear the capture file and start again.
        }
        else if (pH264DecodeOutSample != NULL) {
            // Write decoded sample to capture output buffer.
            CHECK_HR(WriteSampleToBuffer(pH264DecodeOutSample, decodedBuffer),
                "Failed to write sample to buffer.");
        }

        SAFE_RELEASE(pH264DecodeOutSample);

        _sampleCount++;
    done:

        return;
    }
}

MFUtility.h:


#include <stdio.h>
#include <tchar.h>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>

#include <locale>

#define CHECK_HR(hr, msg) if (hr != S_OK) { OutputDebugStringA(msg); char msgBuf[1024]; sprintf_s(msgBuf, "ErrorCode: %.2X.\n", hr); OutputDebugStringA(msgBuf); exit(99); }


template <class T> void SAFE_RELEASE(T** ppT)
{
    if (*ppT)
    {
        (*ppT)->Release();
        *ppT = NULL;
    }
}

template <class T> inline void SAFE_RELEASE(T*& pT)
{
    if (pT != NULL)
    {
        pT->Release();
        pT = NULL;
    }
}


HRESULT WriteSampleToBuffer(IMFSample* pSample, unsigned char* decodedBuffer)
{
    IMFMediaBuffer* buf = NULL;
    DWORD bufLength;

    HRESULT hr = S_OK;

    hr = pSample->ConvertToContiguousBuffer(&buf);
    CHECK_HR(hr, "ConvertToContiguousBuffer failed.");

    hr = buf->GetCurrentLength(&bufLength);
    CHECK_HR(hr, "Get buffer length failed.");

    byte* byteBuffer = NULL;
    DWORD buffMaxLen = 0, buffCurrLen = 0;
    buf->Lock(&byteBuffer, &buffMaxLen, &buffCurrLen);

    memcpy(decodedBuffer, byteBuffer, bufLength);

    SAFE_RELEASE(buf);

    return hr;
}

/**
* Creates a new single buffer media sample.
* @param[in] bufferSize: size of the media buffer to set on the create media sample.
* @param[out] pSample: pointer to the create single buffer media sample.
* @@Returns S_OK if successful or an error code if not.
*/
HRESULT CreateSingleBufferIMFSample(DWORD bufferSize, IMFSample** pSample)
{
    IMFMediaBuffer* pBuffer = NULL;

    HRESULT hr = S_OK;

    hr = MFCreateSample(pSample);
    CHECK_HR(hr, "Failed to create MF sample.");

    // Adds a ref count to the pBuffer object.
    hr = MFCreateMemoryBuffer(bufferSize, &pBuffer);
    CHECK_HR(hr, "Failed to create memory buffer.");

    // Adds another ref count to the pBuffer object.
    hr = (*pSample)->AddBuffer(pBuffer);
    CHECK_HR(hr, "Failed to add sample to buffer.");

    // Leave the single ref count that will be removed when the pSample is released.
    SAFE_RELEASE(pBuffer);
    return hr;
}



/**
* Attempts to get an output sample from an MFT transform.
* @param[in] pTransform: pointer to the media transform to apply.
* @param[out] pOutSample: pointer to the media sample output by the transform. Can be NULL
*  if the transform did not produce one.
* @param[out] transformFlushed: if set to true means the transform format changed and the
*  contents were flushed. Output format of sample most likely changed.
* @@Returns S_OK if successful or an error code if not.
*/
HRESULT GetTransformOutput(IMFTransform* pTransform, IMFSample** pOutSample, BOOL* transformFlushed)
{
    MFT_OUTPUT_STREAM_INFO StreamInfo = { 0 };
    MFT_OUTPUT_DATA_BUFFER outputDataBuffer = { 0 };
    DWORD processOutputStatus = 0;
    IMFMediaType* pChangedOutMediaType = NULL;

    HRESULT hr = S_OK;
    *transformFlushed = FALSE;

    hr = pTransform->GetOutputStreamInfo(0, &StreamInfo);
    CHECK_HR(hr, "Failed to get output stream info from MFT.");

    outputDataBuffer.dwStreamID = 0;
    outputDataBuffer.dwStatus = 0;
    outputDataBuffer.pEvents = NULL;

    if ((StreamInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0) {
        hr = CreateSingleBufferIMFSample(StreamInfo.cbSize, pOutSample);
        CHECK_HR(hr, "Failed to create new single buffer IMF sample.");
        outputDataBuffer.pSample = *pOutSample;
    }

    auto mftProcessOutput = pTransform->ProcessOutput(0, 1, &outputDataBuffer, &processOutputStatus);

    if (mftProcessOutput == S_OK) {
        // Sample is ready and allocated on the transform output buffer.
        *pOutSample = outputDataBuffer.pSample;
    }
    else if (mftProcessOutput == MF_E_TRANSFORM_STREAM_CHANGE) {
        // Format of the input stream has changed. https://docs.microsoft.com/en-us/windows/win32/medfound/handling-stream-changes
        if (outputDataBuffer.dwStatus == MFT_OUTPUT_DATA_BUFFER_FORMAT_CHANGE) {
            hr = pTransform->GetOutputAvailableType(0, 0, &pChangedOutMediaType);
            CHECK_HR(hr, "Failed to get the MFT ouput media type after a stream change.");

            hr = pChangedOutMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV);
            CHECK_HR(hr, "Failed to set media sub type.");

            hr = pTransform->SetOutputType(0, pChangedOutMediaType, 0);
            CHECK_HR(hr, "Failed to set new output media type on MFT.");

            hr = pTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
            CHECK_HR(hr, "Failed to process FLUSH command on MFT.");

            *transformFlushed = TRUE;
        }
        else {
            hr = E_NOTIMPL;
        }

        SAFE_RELEASE(pOutSample);
        *pOutSample = NULL;
    }
    else if (mftProcessOutput == MF_E_TRANSFORM_NEED_MORE_INPUT) {
        // More input is not an error condition but it means the allocated output sample is empty.
        SAFE_RELEASE(pOutSample);
        *pOutSample = NULL;
        hr = MF_E_TRANSFORM_NEED_MORE_INPUT;
    }
    else {
        hr = mftProcessOutput;
        SAFE_RELEASE(pOutSample);
        *pOutSample = NULL;
    }

    SAFE_RELEASE(pChangedOutMediaType);

    return hr;
}

回答1:

To get the best performance, you need to provide a DirectxSurface to decoder :

MFCreateDXSurfaceBuffer function

This is a specialized IMFMediaBuffer :

DirectX Surface Buffer

Your DirectxSurface will come from Unity3D, regarding of your use case, and will have to be compatible. I also think you will need to maintain a pool of DirectxSurface to synchronize decoding and rendering.

I can't guarantee everything will work as expected, because of interop between Unity3D and MediaFoundation.

What is certain in term of performance, decoded frame must stay in GPU before being rendered.

In your current code, I suspect the decoded frame is going back to system memory and then going back to GPU memory before rendering. And it's not optimal. Maybe by using software decoding, you will get better performance, because there will not be this ping pong between system memory and GPU memory. Keep in mind that Unity3D already uses a lot of GPU bandwidth.

来源：https://stackoverflow.com/questions/60061565/poor-performance-on-h264-decoding

标签

uwp

h.264

hololens

ms-media-foundation