admin 管理员组

文章数量: 1087649

D3D12解码显示H.264的IDR帧图像

这一帧,只能是IDR帧。至于P帧和B帧,还不知道如何设置与之相关的引用参数。据说还得根据 pic_order_cnt 和 frame_num 自己编排播放顺序,我不能及。其实开发商业应用,前有FFmpeg,后有 Intel VAAPI ;还有外挂的 nvidia gstreamer,mesa gallium drivers 和 微软 Media Foundation。没必要班门弄斧,这里使用原生D3D12解码IDR帧,只是自娱自乐而已。首先在工程的App类里初始化COM组件:CoInitializeEx(nullptr,COINIT_APARTMENTTHREADED),然后初始化D3D12环境,为方便理解,都写在一个菜单项里:

    RECT rc={0};
    GetClientRect(&rc);

    UINT width(rc.right-rc.left),height(rc.bottom-rc.top);
    HWND hMainWnd(GetSafeHwnd());
    HRESULT hr(E_INVALIDARG);

   CComPtr<ID3D12Debug3> pD3DxDebug(nullptr);
    hr=D3D12GetDebugInterface(IID_PPV_ARGS(&pD3DxDebug));
    pD3DxDebug->SetEnableGPUBasedValidation(1);
    pD3DxDebug->EnableDebugLayer();

    CComPtr<ID3D12Device8> pD3DxDev(nullptr);
    hr=D3D12CreateDevice(nullptr,D3D_FEATURE_LEVEL_12_1,IID_PPV_ARGS(&pD3DxDev));

//    CComQIPtr<ID3D12DebugDevice> pD3DxDbgDev(pD3DxDev);
//    hr=pD3DxDbgDev->ReportLiveDeviceObjects(D3D12_RLDO_DETAIL);

    CComPtr<IDXGIFactory7> pDXGIxFact(nullptr);
    hr=CreateDXGIFactory2(0,IID_PPV_ARGS(&pDXGIxFact));

注释掉的部分用于观察调试信息,以下同。接下来创建三种命令队列,首先是普通命令队列:

    D3D12_COMMAND_QUEUE_DESC CmdQue{};
    CmdQue.Priority=D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
    CmdQue.Type=D3D12_COMMAND_LIST_TYPE_DIRECT;
    CmdQue.Flags=D3D12_COMMAND_QUEUE_FLAG_NONE;

    CComPtr<ID3D12CommandQueue> pD3DxCmdQue(nullptr);
    hr=pD3DxDev->CreateCommandQueue(&CmdQue,IID_PPV_ARGS(&pD3DxCmdQue));

    CComPtr<ID3D12CommandAllocator> pCmdAlloc(nullptr);
    hr=pD3DxDev->CreateCommandAllocator(CmdQue.Type,IID_PPV_ARGS(&pCmdAlloc));

    CComPtr<ID3D12GraphicsCommandList6> pCmdList(nullptr);
    hr=pD3DxDev->CreateCommandList(0,CmdQue.Type,pCmdAlloc,nullptr,IID_PPV_ARGS(&pCmdList));

第二是视频处理命令队列:
    D3D12_COMMAND_QUEUE_DESC CmdQueVP{};
    CmdQueVP.Priority=D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
    CmdQueVP.Type=D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS;
    CmdQueVP.Flags=D3D12_COMMAND_QUEUE_FLAG_NONE;

    CComPtr<ID3D12CommandQueue> pD3DxVpsQue(nullptr);
    hr=pD3DxDev->CreateCommandQueue(&CmdQueVP,IID_PPV_ARGS(&pD3DxVpsQue));

    CComPtr<ID3D12CommandAllocator> pVpsAlloc(nullptr);
    hr=pD3DxDev->CreateCommandAllocator(CmdQueVP.Type,IID_PPV_ARGS(&pVpsAlloc));

    CComPtr<ID3D12VideoProcessCommandList2> pVpsCmdList(nullptr);
    hr=pD3DxDev->CreateCommandList(0,CmdQueVP.Type,pVpsAlloc,nullptr,IID_PPV_ARGS(&pVpsCmdList));

最后是视频解码命令队列:
    D3D12_COMMAND_QUEUE_DESC CmdQueVD{};
    CmdQueVD.Priority=D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
    CmdQueVD.Type=D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
    CmdQueVD.Flags=D3D12_COMMAND_QUEUE_FLAG_NONE;

    CComPtr<ID3D12CommandQueue> pD3DxVdeQue(nullptr);
    hr=pD3DxDev->CreateCommandQueue(&CmdQueVD,IID_PPV_ARGS(&pD3DxVdeQue));

    CComPtr<ID3D12CommandAllocator> pVdeAlloc(nullptr);
    hr=pD3DxDev->CreateCommandAllocator(CmdQueVD.Type,IID_PPV_ARGS(&pVdeAlloc));

    CComPtr<ID3D12VideoDecodeCommandList2> pVdeCmdList(nullptr);
    hr=pD3DxDev->CreateCommandList(0,CmdQueVD.Type,pVdeAlloc,nullptr,IID_PPV_ARGS(&pVdeCmdList));

初始化继续,创建资源围栏,交换链,两种描述符堆,根签名,采样器,渲染shader,渲染管线,显示四边形,深度模板(若只显示平面视频图像,这个不是必须的),到此为止:

    CComPtr<ID3D12Fence1> pD3DxFence(nullptr);
    hr=pD3DxDev->CreateFence(0,D3D12_FENCE_FLAG_NONE,IID_PPV_ARGS(&pD3DxFence));

    DXGI_SWAP_CHAIN_DESC1 scd{};
    scd.Width=width,scd.Height=height;
    scd.BufferUsage=DXGI_USAGE_RENDER_TARGET_OUTPUT;
    scd.SwapEffect=DXGI_SWAP_EFFECT_FLIP_DISCARD;
    scd.Format=DXGI_FORMAT_R8G8B8A8_UNORM;
    scd.AlphaMode=DXGI_ALPHA_MODE_IGNORE;
    scd.Scaling=DXGI_SCALING_STRETCH;
    scd.SampleDesc.Quality=0;
    scd.SampleDesc.Count=1;
    scd.BufferCount=2;
    scd.Stereo=0;
    scd.Flags=0;

    CComPtr<IDXGISwapChain1> pRelaySwap(nullptr);
    hr=pDXGIxFact->CreateSwapChainForHwnd(pD3DxCmdQue,hMainWnd,&scd,nullptr,nullptr,&pRelaySwap);
    CComQIPtr<IDXGISwapChain4> pSwapChain(pRelaySwap);

    D3D12_DESCRIPTOR_HEAP_DESC RtvDesc{};
    RtvDesc.NumDescriptors=scd.BufferCount;
    RtvDesc.Type=D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
    RtvDesc.Flags=D3D12_DESCRIPTOR_HEAP_FLAG_NONE;

    CComPtr<ID3D12DescriptorHeap> pRtvHeap(nullptr);
    hr=pD3DxDev->CreateDescriptorHeap(&RtvDesc,IID_PPV_ARGS(&pRtvHeap));
    UINT RtvDescSize(pD3DxDev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV));

    CD3DX12_CPU_DESCRIPTOR_HANDLE hRtvDesc(pRtvHeap->GetCPUDescriptorHandleForHeapStart());
    vector<CComPtr<ID3D12Resource2>> RescRndr(scd.BufferCount);

    for(UINT idx=0;idx<scd.BufferCount;++idx)
    {
        hr=pSwapChain->GetBuffer(idx,IID_PPV_ARGS(&RescRndr[idx]));
        pD3DxDev->CreateRenderTargetView(RescRndr[idx],nullptr,hRtvDesc);
        hRtvDesc.Offset(1,RtvDescSize);
    }

    D3D12_DESCRIPTOR_HEAP_DESC DsvDesc{};
    DsvDesc.NumDescriptors=1;
    DsvDesc.Type=D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
    DsvDesc.Flags=D3D12_DESCRIPTOR_HEAP_FLAG_NONE;

    CComPtr<ID3D12DescriptorHeap> pDsvHeap(nullptr);
    hr=pD3DxDev->CreateDescriptorHeap(&DsvDesc,IID_PPV_ARGS(&pDsvHeap));
    UINT DsvDescSize(pD3DxDev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV));

    D3D12_DESCRIPTOR_HEAP_DESC CSUxvDesc{};
    CSUxvDesc.NumDescriptors=1;
    CSUxvDesc.Type=D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
    CSUxvDesc.Flags=D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;

    CComPtr<ID3D12DescriptorHeap> pCSUxvHeap(nullptr);
    hr=pD3DxDev->CreateDescriptorHeap(&CSUxvDesc,IID_PPV_ARGS(&pCSUxvHeap));
    UINT CSUxvDescSize(pD3DxDev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));

    vector<CD3DX12_DESCRIPTOR_RANGE1> RngTab(1);
    RngTab[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV,1,0,0);

    vector<CD3DX12_ROOT_PARAMETER1> RootParm(1);
    RootParm[0].InitAsDescriptorTable(1,&RngTab[0],D3D12_SHADER_VISIBILITY_PIXEL);
    
    D3D12_FEATURE_DATA_ROOT_SIGNATURE SignVer{};
    SignVer.HighestVersion=D3D_ROOT_SIGNATURE_VERSION_1_1;

    if(FAILED(pD3DxDev->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE,&SignVer,sizeof(SignVer))))
    {
        SignVer.HighestVersion=D3D_ROOT_SIGNATURE_VERSION_1_0;
    }

    D3D12_STATIC_SAMPLER_DESC SmplDesc{};
    SmplDesc.BorderColor=D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK;
    SmplDesc.ShaderVisibility=D3D12_SHADER_VISIBILITY_PIXEL;
    SmplDesc.ComparisonFunc=D3D12_COMPARISON_FUNC_ALWAYS;
    SmplDesc.AddressU=D3D12_TEXTURE_ADDRESS_MODE_WRAP;
    SmplDesc.AddressV=D3D12_TEXTURE_ADDRESS_MODE_WRAP;
    SmplDesc.AddressW=D3D12_TEXTURE_ADDRESS_MODE_WRAP;
    SmplDesc.Filter=D3D12_FILTER_ANISOTROPIC;
    SmplDesc.MaxLOD=D3D12_FLOAT32_MAX;
    SmplDesc.MaxAnisotropy=16;
    SmplDesc.ShaderRegister=0;
    SmplDesc.RegisterSpace=0;
    SmplDesc.MipLODBias=0;
    SmplDesc.MinLOD=0.f;

    vector<D3D12_STATIC_SAMPLER_DESC> SmpList{SmplDesc};
    CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC RootSign{};  RootSign.Init_1_1(LODWORD(RootParm.size()),&RootParm[0],LODWORD(SmpList.size()),&SmpList[0],D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);

    CComPtr<ID3DBlob> pBlobSign(nullptr),pBlobErr(nullptr);  hr=D3DX12SerializeVersionedRootSignature(&RootSign,SignVer.HighestVersion,&pBlobSign,&pBlobErr);

    CComPtr<ID3D12RootSignature> pRootSign(nullptr);
    hr=pD3DxDev->CreateRootSignature(0,pBlobSign->GetBufferPointer(),
        pBlobSign->GetBufferSize(),IID_PPV_ARGS(&pRootSign));

    CComPtr<ID3DBlob> pVShader(nullptr);
    hr=D3DCompileFromFile(L"shadervp.hlsl",nullptr,nullptr,"VSMain","vs_5_1",
        D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION,0,&pVShader,nullptr);

    CComPtr<ID3DBlob> pPShader(nullptr);
    hr=D3DCompileFromFile(L"shadervp.hlsl",nullptr,nullptr,"PSMain","ps_5_1",
        D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION,0,&pPShader,nullptr);

    vector<D3D12_INPUT_ELEMENT_DESC> ied
    {  {"POSITION",0,DXGI_FORMAT_R32G32B32_FLOAT,0,offsetof(D3DVertex,pos),D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,0},
        {"NORMAL",0,DXGI_FORMAT_R32G32B32_FLOAT,0,offsetof(D3DVertex,nml),D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,0},
        {"TEXCOORD",0,DXGI_FORMAT_R32G32_FLOAT,0,offsetof(D3DVertex,tex),D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,0}
    };

    D3D12_GRAPHICS_PIPELINE_STATE_DESC GpsDesc{};
    GpsDesc.PrimitiveTopologyType=D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
    GpsDesc.DepthStencilState=CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
    GpsDesc.RasterizerState=CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
    GpsDesc.BlendState=CD3DX12_BLEND_DESC(D3D12_DEFAULT);
    GpsDesc.InputLayout={ied.data(),LODWORD(ied.size())};
    GpsDesc.RTVFormats[0]=DXGI_FORMAT_R8G8B8A8_UNORM;
    GpsDesc.DSVFormat=DXGI_FORMAT_D24_UNORM_S8_UINT;
    GpsDesc.VS=CD3DX12_SHADER_BYTECODE(pVShader);
    GpsDesc.PS=CD3DX12_SHADER_BYTECODE(pPShader);
    GpsDesc.pRootSignature=pRootSign;
    GpsDesc.SampleDesc.Quality=0;
    GpsDesc.SampleDesc.Count=1;
    GpsDesc.SampleMask=UINT_MAX;
    GpsDesc.NumRenderTargets=1;

    CComPtr<ID3D12PipelineState> pPipeStat(nullptr);
    hr=pD3DxDev->CreateGraphicsPipelineState(&GpsDesc,IID_PPV_ARGS(&pPipeStat));

    vector<D3DVertex> QuadVtx //四边形顶点数据
    {
        {{-1.f,-1.f,0.f},{0.f,0.f,-1.f},{0.f,1.f}},
        {{-1.f,1.f,0.f},{0.f,0.f,-1.f},{0.f,0.f}},
        {{1.f,1.f,0.f},{0.f,0.f,-1.f},{1.f,0.f}},
        {{-1.f,-1.f,0.f},{0.f,0.f,-1.f},{0.f,1.f}},
        {{1.f,1.f,0.f},{0.f,0.f,-1.f},{1.f,0.f}},
        {{1.f,-1.f,0.f},{0.f,0.f,-1.f},{1.f,1.f}}
    };

    vector<UINT> VtxNum(1,LODWORD(QuadVtx.size()));
    vector<UINT> VtxByteLen(1,VtxNum[0]*sizeof(D3DVertex));

    shared_ptr<UploadBuffer<D3DVertex>> pUplQuad(nullptr);
    pUplQuad=make_shared<UploadBuffer<D3DVertex>>(pD3DxDev,VtxNum[0],0);
    CopyMemory(pUplQuad->RetMapPin(),&QuadVtx[0],VtxByteLen[0]);

    vector<shared_ptr<UploadBuffer<D3DVertex>>> UplBuf{pUplQuad};
    vector<D3D12_VERTEX_BUFFER_VIEW> vbv(UplBuf.size());

    for(UINT idx=0;idx<vbv.size();++idx)
    {
        BindVtxBufView(UplBuf[idx]->RetUploadInst(),vbv[idx],sizeof(D3DVertex),VtxByteLen[idx]);
    }

    D3D12_HEAP_PROPERTIES DepthProp{};
    DepthProp.CPUPageProperty=D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
    DepthProp.MemoryPoolPreference=D3D12_MEMORY_POOL_UNKNOWN;
    DepthProp.Type=D3D12_HEAP_TYPE_DEFAULT;

    D3D12_RESOURCE_DESC DepthRes{};
    DepthRes.Width=width,DepthRes.Height=height;
    DepthRes.Flags=D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    DepthRes.Dimension=D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    DepthRes.Format=DXGI_FORMAT_D24_UNORM_S8_UINT;
    DepthRes.Layout=D3D12_TEXTURE_LAYOUT_UNKNOWN;
    DepthRes.SampleDesc.Quality=0;
    DepthRes.SampleDesc.Count=1;
    DepthRes.DepthOrArraySize=1;
    DepthRes.MipLevels=1;

    D3D12_CLEAR_VALUE ClrOpt{};
    ClrOpt.Format=DXGI_FORMAT_D24_UNORM_S8_UINT;
    ClrOpt.DepthStencil.Depth=1.f;
    ClrOpt.DepthStencil.Stencil=0;

    CComPtr<ID3D12Resource2> pDepthBuf(nullptr);
    hr=pD3DxDev->CreateCommittedResource(&DepthProp,D3D12_HEAP_FLAG_NONE,&DepthRes,
        D3D12_RESOURCE_STATE_DEPTH_WRITE,&ClrOpt,IID_PPV_ARGS(&pDepthBuf));

    D3D12_DEPTH_STENCIL_VIEW_DESC DsvView{};
    DsvView.ViewDimension=D3D12_DSV_DIMENSION_TEXTURE2D;
    DsvView.Format=DXGI_FORMAT_D24_UNORM_S8_UINT;
    DsvView.Flags=D3D12_DSV_FLAG_NONE;
    DsvView.Texture2D.MipSlice=0;

    D3D12_CPU_DESCRIPTOR_HANDLE hDsvDesc(pDsvHeap->GetCPUDescriptorHandleForHeapStart());
    pD3DxDev->CreateDepthStencilView(pDepthBuf,&DsvView,hDsvDesc);

注意到四边形映射的竖直方向纹理坐标是翻转的,那是因为将来解码出的RGB数据也是那样的。那个计算偏移量的 offsetof 可以用 D3D12_APPEND_ALIGNED_ELEMENT 代替,效果相同。还得下载最新版 d3dx12.h,否则根签名的 Init_1_1 方法无法编译通过。现在步入正题,首先检测系统所支持的视频转换格式:

    CComQIPtr<ID3D12VideoDevice> pD3DxVid(pD3DxDev);
    UINT PicWid(1920),PicHit(1088),FrmRate(30);
    const UINT DXVA_STAT(512);

    D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT FeaSupt
    {
        0,{PicWid,PicHit,{DXGI_FORMAT_NV12,DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709}},    D3D12_VIDEO_FIELD_TYPE_NONE,D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE,{FrmRate,1},  {DXGI_FORMAT_R8G8B8A8_UNORM,DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709},D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE,{FrmRate,1}
    };

    hr=pD3DxVid->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_SUPPORT,&FeaSupt,sizeof(FeaSupt));
    if(!(FeaSupt.SupportFlags&D3D12_VIDEO_PROCESS_SUPPORT_FLAG_SUPPORTED))
    {
        OutputDebugPrintf("%s\n","非法格式转换:DXGI_FORMAT_NV12~DXGI_FORMAT_R8G8B8A8_UNORM");
        return;
    }

然后创建视频处理器:
    D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC VPinDesc{};
    VPinDesc.DestinationSizeRange=D3D12_VIDEO_SIZE_RANGE{PicWid,PicHit,128,72};
    VPinDesc.SourceSizeRange=D3D12_VIDEO_SIZE_RANGE{PicWid,PicHit,128,72};
    VPinDesc.ColorSpace=DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709;
    VPinDesc.StereoFormat=D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE;
    VPinDesc.DestinationAspectRatio=DXGI_RATIONAL(16,9);
    VPinDesc.SourceAspectRatio=DXGI_RATIONAL(16,9);
    VPinDesc.FrameRate=DXGI_RATIONAL(FrmRate,1);
    VPinDesc.Format=DXGI_FORMAT_NV12;
    VPinDesc.EnableOrientation=1;

    D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC VPoutDesc{};
    VPoutDesc.AlphaFillMode=D3D12_VIDEO_PROCESS_ALPHA_FILL_MODE_OPAQUE;
    VPoutDesc.ColorSpace=DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709;
    VPoutDesc.Format=DXGI_FORMAT_R8G8B8A8_UNORM;
    VPoutDesc.FrameRate=DXGI_RATIONAL(FrmRate,1);
    VPoutDesc.EnableStereo=0;

    CComPtr<ID3D12VideoProcessor> pD3DxVidPrcs(nullptr);
    hr=pD3DxVid->CreateVideoProcessor(0,&VPoutDesc,1,&VPinDesc,IID_PPV_ARGS(&pD3DxVidPrcs));

接着创建视频解码器和解码堆:
    D3D12_VIDEO_DECODER_DESC VDecDesc{};
    VDecDesc.Configuration.InterlaceType=D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;  VDecDesc.Configuration.BitstreamEncryption=D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE;
    VDecDesc.Configuration.DecodeProfile=D3D12_VIDEO_DECODE_PROFILE_H264;
    VDecDesc.NodeMask=0;

    CComPtr<ID3D12VideoDecoder> pD3DxVidDec(nullptr);
    hr=pD3DxVid->CreateVideoDecoder(&VDecDesc,IID_PPV_ARGS(&pD3DxVidDec));

    D3D12_VIDEO_DECODER_HEAP_DESC VDeapDesc{};
    VDeapDesc.DecodeWidth=PicWid,VDeapDesc.DecodeHeight=PicHit;
    VDeapDesc.Configuration=VDecDesc.Configuration;
    VDeapDesc.MaxDecodePictureBufferCount=16;
    VDeapDesc.FrameRate=DXGI_RATIONAL(FrmRate,1);
    VDeapDesc.Format=VPinDesc.Format;
    VDeapDesc.BitRate=720*(1<<8);
    VDeapDesc.NodeMask=0;

    CComPtr<ID3D12VideoDecoderHeap> pD3DxViDHeap(nullptr);
    hr=pD3DxVid->CreateVideoDecoderHeap(&VDeapDesc,IID_PPV_ARGS(&pD3DxViDHeap));

还可以配置一个请求堆用来反馈解码状态,这个不是必须的,除非你故意调戏解码器,否则看不到什么有价值的提示:
    CComPtr<ID3D12VideoDecoderHeap> pD3DxViDHeap(nullptr);
    hr=pD3DxVid->CreateVideoDecoderHeap(&VDeapDesc,IID_PPV_ARGS(&pD3DxViDHeap));

    UINT StatInfoLen(sizeof(D3D12_QUERY_DATA_VIDEO_DECODE_STATISTICS));
    CComPtr<ID3D12Resource> pReadBack(nullptr),pQueryInfo(nullptr);
    CComPtr<ID3D12QueryHeap> pQueryHeap(nullptr);

    //解码统计信息  hr=QueryDecodeStatus(pD3DxDev,pQueryHeap.p,pReadBack.p,pQueryInfo.p,StatInfoLen,DXVA_STAT);

下面为开始正式解码做一些准备工作:载入IDR帧图像,获取NALU数据,填充三个数据结构(DXVA_PicParams_H264,DXVA_Qmatrix_H264和DXVA_Slice_H264_Short),上传资源:
    vector<BYTE> RawSlc(0);
    string file("f:/movie/zSlice0.raw");
    ReadRawStrm(file,RawSlc);

    vector<BYTE> sps(0),pps(0),sei(0);
   POINTZ SlcRng(GazrNALU(RawSlc,sps,pps,sei)); //必须保留压缩位流3或4字节起始码
    RawSlc.erase(RawSlc.begin(),RawSlc.begin()+SlcRng.x);

    vector<BYTE> hdr(0); //忽略切片头部3或4字节起始码
    hdr.assign(RawSlc.begin()+SlcRng.z,RawSlc.begin()+min(SlcRng.y,RawSlc.size()));

    DXVA_PicParams_H264 PicParm{};
    PackPicParm(PicParm,PicWid,PicHit);

    DXVA_Qmatrix_H264 InvQmat{};
    PackInvQmat(InvQmat);

    DXVA_Slice_H264_Short SlcInfo{};
    PackSlcInfo(hdr,SlcInfo);

    CComPtr<ID3D12Resource> pStrmData(nullptr);
    hr=LoadStrmData(pD3DxDev,pStrmData.p,RawSlc);

注意到必须保留切片压缩数据流的3或4字节起始码,否则显示一片黑屏。另外提交资源必须创建自定义堆(D3D12_HEAP_TYPE_CUSTOM ),配合 D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE 写入组合标识,初始设定为 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ 状态。不能用默认堆,也是一片黑屏,我都实验过。毕竟只是自娱自乐的实验品,于是用 H264BSAnalyzer 兑出 DXVA_PicParams_H264 中所有必要参数。真的是一个也不能少,否则还是一片黑屏。那个反量化矩阵使用默认值即可,全是16;而且4x4和8x8的矩阵都得设置,否则会有色块。仅显示一帧图像,切片信息就敷衍了事。把以上数据填入解码输入流结构:

    D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS VDinArg{};
    UINT& FrmNum(VDinArg.NumFrameArguments);
    D3D12_RESOURCE_STATES InitStat{};  VDinArg.FrameArguments[FrmNum].Type=D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS;
    VDinArg.FrameArguments[FrmNum].Size=sizeof(DXVA_PicParams_H264);
    VDinArg.FrameArguments[FrmNum++].pData=&PicParm;  VDinArg.FrameArguments[FrmNum].Type=D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX;
    VDinArg.FrameArguments[FrmNum].Size=sizeof(DXVA_Qmatrix_H264);
    VDinArg.FrameArguments[FrmNum++].pData=&InvQmat;  VDinArg.FrameArguments[FrmNum].Type=D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL;
    VDinArg.FrameArguments[FrmNum].Size=sizeof(DXVA_Slice_H264_Short);
    VDinArg.FrameArguments[FrmNum++].pData=&SlcInfo;

    VDinArg.CompressedBitstream.pBuffer=pStrmData;
    VDinArg.CompressedBitstream.Size=RawSlc.size();
    VDinArg.CompressedBitstream.Offset=0;
    VDinArg.pHeap=pD3DxViDHeap;

再接着创建视频解码输出纹理,还是自定义堆(D3D12_HEAP_TYPE_CUSTOM ),配合 D3D12_CPU_PAGE_PROPERTY_WRITE_BACK 写回标识;配置解码输出流结构,正式解码后转换资源状态,方便后续的视频处理:
    CComPtr<ID3D12Resource> pYUVxTex2D(nullptr);
    InitStat=D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE;  hr=CreateVideoTexture(pD3DxDev,pCmdList,pYUVxTex2D.p,InitStat,VPinDesc.Format,PicWid,PicHit);
    D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS VDoutArg{};
    VDoutArg.ConversionArguments.Enable=0;
    VDoutArg.pOutputTexture2D=pYUVxTex2D;
    VDoutArg.OutputSubresource=0;

    //解码帧图像
    pVdeCmdList->DecodeFrame(pD3DxVidDec,&VDoutArg,&VDinArg);

TransResStat(pVdeCmdList,VDinArg.CompressedBitstream.pBuffer,                    D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,D3D12_RESOURCE_STATE_COMMON);

    TransResStat(pVdeCmdList,VDoutArg.pOutputTexture2D,        D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,D3D12_RESOURCE_STATE_COMMON);

在提交到视频解码队列并等待解码命令完成之前,还可以做些额外处理,后面这两步都不是必须的,可以用回读堆映射内存,还可以验证解码图像的正确性。比如用 YuvEye 可以查看保存的YUV图像。那个YUV图像是亮度(Y)和色度(UV)分别存放在两个资源里,所以要自己把它们合并成一个文件:

    //获取请求信息
    ObtainQueryInfo(pCmdList,pVdeCmdList,pQueryHeap,pReadBack,pQueryInfo);

    hr=pVdeCmdList->Close();
    vector<ID3D12CommandList*> CmdListVD{pVdeCmdList};
    pD3DxVdeQue->ExecuteCommandLists(LODWORD(CmdListVD.size()),&CmdListVD[0]);

    //等待帧解码完成
    FlushCmdQue(pD3DxVdeQue,pD3DxFence);

    hr=pVdeAlloc->Reset();
    hr=pVdeCmdList->Reset(pVdeAlloc);

/*/

    LPBYTE pErrTip(nullptr);
    hr=pReadBack->Map(0,nullptr,(LPVOID*)&pErrTip);
    pReadBack->Unmap(0,nullptr);

    file="f:/download/zDecTex.yuv"; //验证解码图像正确性
    MergeResPlane(VDoutArg.pOutputTexture2D,PicWid,PicHit,file);

//*/

该是倒数第二步了,配置视频处理输入结构,输出结构,创建视频处理输出纹理,还是自定义堆(D3D12_HEAP_TYPE_CUSTOM ),使用 D3D12_CPU_PAGE_PROPERTY_WRITE_BACK 写回标识,初始设定为 D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE 状态。正式处理后转换资源状态,方便后续显示画面。提交到视频处理队列并等待处理命令完成:

    D3D12_VIDEO_PROCESS_INPUT_STREAM_ARGUMENTS VPinArg{};
    VPinArg.Transform.Orientation=D3D12_VIDEO_PROCESS_ORIENTATION_DEFAULT;
    VPinArg.Transform.DestinationRectangle=CD3DX12_RECT(0,0,PicWid,PicHit);
    VPinArg.Transform.SourceRectangle=CD3DX12_RECT(0,0,PicWid,PicHit);
    VPinArg.Flags=D3D12_VIDEO_PROCESS_INPUT_STREAM_FLAG_NONE;
    VPinArg.InputStream[0].pTexture2D=VDoutArg.pOutputTexture2D;
    VPinArg.InputStream[0].Subresource=0;

    TransResStat(pVpsCmdList,VPinArg.InputStream[0].pTexture2D,        D3D12_RESOURCE_STATE_COMMON,D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ);

    CComPtr<ID3D12Resource> pRGBxTex2D(nullptr);
    InitStat=D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE;
    hr=CreateVideoTexture(pD3DxDev,pCmdList,pRGBxTex2D.p,InitStat,VPoutDesc.Format,PicWid,PicHit);

    D3D12_VIDEO_PROCESS_OUTPUT_STREAM_ARGUMENTS VPoutArg{};
    VPoutArg.TargetRectangle=CD3DX12_RECT(0,0,PicWid,PicHit);
    VPoutArg.OutputStream[0].pTexture2D=pRGBxTex2D;
    VPoutArg.OutputStream[0].Subresource=0;

    //处理帧图像
    pVpsCmdList->ProcessFrames(pD3DxVidPrcs,&VPoutArg,1,&VPinArg);

    TransResStat(pVpsCmdList,VPoutArg.OutputStream[0].pTexture2D,        D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE,D3D12_RESOURCE_STATE_COMMON);

    hr=pVpsCmdList->Close();
    vector<ID3D12CommandList*> CmdListVP{pVpsCmdList};
    pD3DxVpsQue->ExecuteCommandLists(LODWORD(CmdListVP.size()),&CmdListVP[0]);

    //等待视频处理完成
    FlushCmdQue(pD3DxVpsQue,pD3DxFence);

hr=pVpsAlloc->Reset();
hr=pVpsCmdList->Reset(pVpsAlloc);

CComPtr<ID3D12Resource> pImgTex2D(nullptr);
    InitStat=D3D12_RESOURCE_STATE_COPY_DEST;  hr=CreateVideoTexture(pD3DxDev,pImgTex2D.p,InitStat,ResFlag,VPoutDesc.Format,PicWid,PicHit);  hr=DuplicateRGBxPlane(pCmdList,pImgTex2D,VPoutArg.OutputStream[0].pTexture2D,InitStat,PicWid,PicHit);

上面把RGB平面数据复制到新资源,是为了避免在显示画面时,出现不兼容图层的提示(前提就是在最前面使能了GPU合法性验证)。现在终于到达最后一步,顺理成章的显示出期待的画面:
 D3D12_SHADER_RESOURCE_VIEW_DESC SrvDesc{};
 SrvDesc.Shader4ComponentMapping=D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
    SrvDesc.ViewDimension=D3D12_SRV_DIMENSION_TEXTURE2D;
    SrvDesc.Format=VPoutDesc.Format;
    SrvDesc.Texture2D.MipLevels=1;

    CD3DX12_CPU_DESCRIPTOR_HANDLE hCSUxvDesc(pCSUxvHeap->GetCPUDescriptorHandleForHeapStart());
    pD3DxDev->CreateShaderResourceView(pImgTex2D,&SrvDesc,hCSUxvDesc);

    hr=pCmdList->Close();
    vector<ID3D12CommandList*> CmdList{pCmdList};
    pD3DxCmdQue->ExecuteCommandLists(LODWORD(CmdList.size()),&CmdList[0]);

    //等待指令完成
    FlushCmdQue(pD3DxCmdQue,pD3DxFence);

    D3D12_VIEWPORT ViewPort{};
    AdaptViewPort(ViewPort,width,height);

    UINT BackBufIdx(pSwapChain->GetCurrentBackBufferIndex()); //获取当前离屏缓冲区
    {

        hr=pCmdAlloc->Reset();
        hr=pCmdList->Reset(pCmdAlloc,pPipeStat);

        pCmdList->SetGraphicsRootSignature(pRootSign); //设置根签名
        pCmdList->SetPipelineState(pPipeStat); //设置渲染管道

        vector<ID3D12DescriptorHeap*> DescHeap={pCSUxvHeap};
        pCmdList->SetDescriptorHeaps(LODWORD(DescHeap.size()),&DescHeap[0]);

        CD3DX12_GPU_DESCRIPTOR_HANDLE hGpuDesc(pCSUxvHeap->GetGPUDescriptorHandleForHeapStart());
        pCmdList->SetGraphicsRootDescriptorTable(0,hGpuDesc); //设置SRV插槽

        pCmdList->RSSetViewports(1,&ViewPort);
        pCmdList->RSSetScissorRects(1,&rc);

        CD3DX12_CPU_DESCRIPTOR_HANDLE hRtvDesc(pRtvHeap->GetCPUDescriptorHandleForHeapStart(),BackBufIdx,RtvDescSize);
        CD3DX12_CPU_DESCRIPTOR_HANDLE hDsvDesc(pDsvHeap->GetCPUDescriptorHandleForHeapStart());

        //资源状态转换:PRESENT->RENDER_TARGET  TransResStat(pCmdList,RescRndr[BackBufIdx],D3D12_RESOURCE_STATE_PRESENT,D3D12_RESOURCE_STATE_RENDER_TARGET);

        //设置呈现渲染目标
        pCmdList->OMSetRenderTargets(1,&hRtvDesc,0,&hDsvDesc);

        shared_ptr<float> clr(Color2Float(0x867892));
        pCmdList->ClearRenderTargetView(hRtvDesc,clr.get(),1,&rc);
        pCmdList->ClearDepthStencilView(hDsvDesc,D3D12_CLEAR_FLAG_DEPTH|D3D12_CLEAR_FLAG_STENCIL,1.f,0,0,nullptr);

        //渲染具体内容
        pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
        pCmdList->IASetVertexBuffers(0,1,&vbv[0]);
        pCmdList->DrawInstanced(VtxNum[0],1,0,0);

        //资源状态转换:RENDER_TARGET->PRESENT  TransResStat(pCmdList,RescRndr[BackBufIdx],D3D12_RESOURCE_STATE_RENDER_TARGET,D3D12_RESOURCE_STATE_PRESENT);

        hr=pCmdList->Close();
        vector<ID3D12CommandList*> CmdList(1,pCmdList);
        pD3DxCmdQue->ExecuteCommandLists(LODWORD(CmdList.size()),&CmdList[0]);

        pSwapChain->Present(1,0);
        FlushCmdQue(pD3DxCmdQue,pD3DxFence);
    }

以上文中用到的所有辅助函数都在这里:
void BindVtxBufView(ID3D12Resource* pUploadInst,D3D12_VERTEX_BUFFER_VIEW& vbv,
    UINT ElemSize,UINT ByteNum)
{
    vbv.BufferLocation=pUploadInst->GetGPUVirtualAddress();
    vbv.StrideInBytes=ElemSize;
    vbv.SizeInBytes=ByteNum;
}

void AdaptViewPort(D3D12_VIEWPORT& ViewPort,int width,int height)
{
    ViewPort.Width=width*1.f,ViewPort.Height=height*1.f;
    ViewPort.TopLeftX=ViewPort.TopLeftY=0.f;
    ViewPort.MaxDepth=D3D12_MAX_DEPTH;
    ViewPort.MinDepth=D3D12_MIN_DEPTH;
}

float* Color2Float(DWORD val)
{
    vector<BYTE> rgb={LOBYTE(HIWORD(val)),HIBYTE(LOWORD(val)),LOBYTE(LOWORD(val))};
    float* clr(new float[4]);

    for(int idx=0;idx<3;++idx)
    {
        clr[idx]=rgb[idx]/255.f;
    }
    clr[3]=1.f;

    return(clr);
}

HRESULT FlushCmdQue(ID3D12CommandQueue* pD3DxCmdQue,ID3D12Fence* pD3DxFence)
{
    static UINT64 FenWait(0);
    ++FenWait;

    HRESULT hr(pD3DxCmdQue->Signal(pD3DxFence,FenWait));
    if(pD3DxFence->GetCompletedValue()<FenWait)
    {
        HANDLE hWaitCmd(CreateEventEx(nullptr,"WaitCmd",0,EVENT_ALL_ACCESS));
        hr=pD3DxFence->SetEventOnCompletion(FenWait,hWaitCmd);

        WaitForSingleObject(hWaitCmd,INFINITE);
        CloseHandle(hWaitCmd);
    }

    return(hr);
}

void TransResStat(ID3D12GraphicsCommandList6* pGrsCmdList,ID3D12Resource* pTransInst,
    D3D12_RESOURCE_STATES PrevStat,D3D12_RESOURCE_STATES NowStat)
{
    if(PrevStat!=NowStat)
    {
        D3D12_RESOURCE_BARRIER ResBar{};
        ResBar.Transition.Subresource=D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        ResBar.Type=D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
        ResBar.Transition.pResource=pTransInst;
        ResBar.Transition.StateBefore=PrevStat;
        ResBar.Transition.StateAfter=NowStat;
        pGrsCmdList->ResourceBarrier(1,&ResBar);
    }
}

void TransResStat(ID3D12VideoProcessCommandList2* pVpsCmdList,ID3D12Resource* pTransInst,
    D3D12_RESOURCE_STATES PrevStat,D3D12_RESOURCE_STATES NowStat)
{
    if(PrevStat!=NowStat)
    {
        D3D12_RESOURCE_BARRIER ResBar{};
        ResBar.Transition.Subresource=D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        ResBar.Type=D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
        ResBar.Transition.pResource=pTransInst;
        ResBar.Transition.StateBefore=PrevStat;
        ResBar.Transition.StateAfter=NowStat;
        pVpsCmdList->ResourceBarrier(1,&ResBar);
    }
}

void TransResStat(ID3D12VideoDecodeCommandList2* pVdeCmdList,ID3D12Resource* pTransInst,
    D3D12_RESOURCE_STATES PrevStat,D3D12_RESOURCE_STATES NowStat)
{
    if(PrevStat!=NowStat)
    {
        D3D12_RESOURCE_BARRIER ResBar{};
        ResBar.Transition.Subresource=D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        ResBar.Type=D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
        ResBar.Transition.pResource=pTransInst;
        ResBar.Transition.StateBefore=PrevStat;
        ResBar.Transition.StateAfter=NowStat;
        pVdeCmdList->ResourceBarrier(1,&ResBar);
    }
}

HRESULT LoadStrmData(ID3D12Device8* pD3DxDev,ID3D12Resource*& pStrmData,vector<BYTE>& RawSlc)
{
    D3D12_RESOURCE_DESC ResDesc=CD3DX12_RESOURCE_DESC::Buffer(AlignBufSize(RawSlc.size(),128));
    D3D12_HEAP_PROPERTIES HeapProp
    {        CD3DX12_HEAP_PROPERTIES(D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,D3D12_MEMORY_POOL_L0)
    };
    HRESULT hr(pD3DxDev->CreateCommittedResource(&HeapProp,D3D12_HEAP_FLAG_NONE,&ResDesc,
        D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,nullptr,IID_PPV_ARGS(&pStrmData)));

    LPBYTE pMapData(nullptr);
    hr=pStrmData->Map(0,nullptr,(LPVOID*)&pMapData);
    CopyMemory(pMapData,RawSlc.data(),RawSlc.size());
    pStrmData->Unmap(0,nullptr);

    return(hr);
}

HRESULT CreateVideoTexture(ID3D12Device8* pD3DxDev,ID3D12Resource*& pTexBuf,D3D12_RESOURCE_STATES InitStat,DXGI_FORMAT ResFmt,
    UINT ImgWidth,UINT ImgHigh)
{
    D3D12_RESOURCE_DESC TexDesc{};
    TexDesc.Width=ImgWidth,TexDesc.Height=ImgHigh;
    TexDesc.Dimension=D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    TexDesc.Layout=D3D12_TEXTURE_LAYOUT_UNKNOWN;
    TexDesc.Flags=D3D12_RESOURCE_FLAG_NONE;
    TexDesc.SampleDesc.Quality=0;
    TexDesc.SampleDesc.Count=1;
    TexDesc.DepthOrArraySize=1;
    TexDesc.Format=ResFmt;
    TexDesc.MipLevels=1;
    TexDesc.Alignment=0;

    D3D12_HEAP_PROPERTIES HeapProp
    {        CD3DX12_HEAP_PROPERTIES(D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,D3D12_MEMORY_POOL_L0)
    };
    HRESULT hr(pD3DxDev->CreateCommittedResource(&HeapProp,D3D12_HEAP_FLAG_NONE,&TexDesc,
        InitStat,nullptr,IID_PPV_ARGS(&pTexBuf)));

    return(hr);
}

HRESULT QueryDecodeStatus(ID3D12Device8* pD3DxDev,ID3D12QueryHeap*& pQueryHeap,
    ID3D12Resource*& pReadBack,ID3D12Resource*& pQueryInfo,UINT ElemSize,UINT ElemNum)
{
    UINT BufLen(ElemSize*ElemNum);
    HRESULT hr(E_INVALIDARG);

    D3D12_QUERY_HEAP_DESC QYeapDesc{};
    QYeapDesc.NodeMask=0; QYeapDesc.Count=1;
    QYeapDesc.Type=D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS;
    hr=pD3DxDev->CreateQueryHeap(&QYeapDesc,IID_PPV_ARGS(&pQueryHeap));

    D3D12_HEAP_PROPERTIES HeapProp=CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK);
    D3D12_RESOURCE_DESC ResDesc=CD3DX12_RESOURCE_DESC::Buffer(BufLen);

    hr=pD3DxDev->CreateCommittedResource(&HeapProp,D3D12_HEAP_FLAG_NONE,&ResDesc,
        D3D12_RESOURCE_STATE_COPY_DEST,nullptr,IID_PPV_ARGS(&pReadBack));

    HeapProp=CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
    hr=pD3DxDev->CreateCommittedResource(&HeapProp,D3D12_HEAP_FLAG_NONE,&ResDesc,
        D3D12_RESOURCE_STATE_COMMON,nullptr,IID_PPV_ARGS(&pQueryInfo));

    return(hr);
}

void ObtainQueryInfo(ID3D12GraphicsCommandList6* pCmdList,ID3D12VideoDecodeCommandList2* pVdeCmdList,
    ID3D12QueryHeap* pQueryHeap,ID3D12Resource* pReadBack,ID3D12Resource* pQueryInfo)
{
    D3D12_QUERY_TYPE QueryType(D3D12_QUERY_TYPE_VIDEO_DECODE_STATISTICS);
    pVdeCmdList->EndQuery(pQueryHeap,QueryType,0);

    //取回请求数据
    pVdeCmdList->ResolveQueryData(pQueryHeap,QueryType,0,1,pQueryInfo,0);

    //资源状态转换:COMMON->COPY_SOURCE  TransResStat(pCmdList,pQueryInfo,D3D12_RESOURCE_STATE_COMMON,D3D12_RESOURCE_STATE_COPY_SOURCE);

    //复制到回读堆
    pCmdList->CopyResource(pReadBack,pQueryInfo);

    //资源状态转换:COPY_SOURCE->COMMON  TransResStat(pCmdList,pQueryInfo,D3D12_RESOURCE_STATE_COPY_SOURCE,D3D12_RESOURCE_STATE_COMMON);
}

POINTZ GazrNALU(vector<BYTE>& slice,vector<BYTE>& sps,vector<BYTE>& pps,vector<BYTE>& sei)
{
    vector<POINTZ> seg(0);
    for(UINT idx=0;idx<slice.size();++idx)
    {
        if((0==slice[idx]&&0==slice[idx+1]&&0==slice[idx+2]&&1==slice[idx+3])||
            (0==slice[idx]&&0==slice[idx+1]&&1==slice[idx+2]))
        {
            int val(*(int*)(slice.data()+idx)&0xFFFFFFFF),bit(val&-val);
            int pos(1+(int)Log2(bit)/8);
            int nut(slice[idx+pos]&0x1F);

           seg.push_back({idx,nut,pos});
            idx+=pos;
        }
    }
   seg.push_back({(int)slice.size(),0,0});

    POINTZ SlcRng{};
    for(UINT idx=0;idx<seg.size()-1;++idx)
    {
        switch(seg[idx].y)
        {
        case 6: //sei
            sei.assign(slice.begin()+seg[idx].x+seg[idx].z,slice.begin()+seg[idx+1].x);
            break;

        case 7: //sps
            sps.assign(slice.begin()+seg[idx].x+seg[idx].z,slice.begin()+seg[idx+1].x);
            break;

        case 8: //pps
            pps.assign(slice.begin()+seg[idx].x+seg[idx].z,slice.begin()+seg[idx+1].x);
            break;

        case 1: //P|B帧
        case 5: //IDR|I帧
            SlcRng={seg[idx].x,seg[idx+1].x,seg[idx].z};
            break;
        }
    }

    return(SlcRng);
}

void PackPicParm(DXVA_PicParams_H264& PicParm,UINT PicWid,UINT PicHit)
{
    PicParm.wFrameWidthInMbsMinus1=PicWid/16-1;
    PicParm.wFrameHeightInMbsMinus1=PicHit/16-1;

    PicParm.num_ref_frames=4;
    PicParm.chroma_format_idc=1;
    PicParm.weighted_pred_flag=1;
    PicParm.frame_mbs_only_flag=1;
    PicParm.MinLumaBipredSize8x8Flag=1;
    PicParm.transform_8x8_mode_flag=1;
    PicParm.weighted_bipred_idc=2;
    PicParm.frame_mbs_only_flag=1;
    PicParm.MbsConsecutiveFlag=1;

    PicParm.IntraPicFlag=1;
    PicParm.ContinuationFlag=1;
    PicParm.pic_init_qp_minus26=-5;
    PicParm.StatusReportFeedbackNumber=1;
    PicParm.log2_max_pic_order_cnt_lsb_minus4=2;
    PicParm.deblocking_filter_control_present_flag=1;
    PicParm.num_ref_idx_l0_active_minus1=1;
    PicParm.direct_8x8_inference_flag=1;
    PicParm.entropy_coding_mode_flag=1;
    PicParm.Reserved16Bits=3;
}

void PackInvQmat(DXVA_Qmatrix_H264& InvQmat)
{
    for(UINT row=0;row<6;++row)
    {
        for(UINT col=0;col<16;++col)
        {
            InvQmat.bScalingLists4x4[row][col]=16;
        }
    }

    for(UINT row=0;row<2;++row)
    {
        for(UINT col=0;col<64;++col)
        {
            InvQmat.bScalingLists8x8[row][col]=16;
        }
    }
}

void PackSlcInfo(vector<BYTE>& hdr,DXVA_Slice_H264_Short& SlcInfo)
{
    SlcInfo.SliceBytesInBuffer=LODWORD(hdr.size());
    SlcInfo.BSNALunitDataLocation=0;
    SlcInfo.wBadSliceChopping=0;
}

void MergeResPlane(ID3D12Resource* pYUVxTex2D,UINT PicWid,UINT PicHit,string& file)
{    //子资源0~Y平面,子资源1~UV平面
    UINT BufSiz(PicWid*PicHit);
    vector<BYTE> MapData(BufSiz);
    HRESULT hr(E_INVALIDARG);

    shared_ptr<FILE> fp(fopen(file.c_str(),"wb"),bind(fclose,_1));
    for(UINT idx=0;idx<2;++idx)
    {
        hr=pYUVxTex2D->ReadFromSubresource(&MapData[0],PicWid,BufSiz,idx,nullptr);
        fwrite(MapData.data(),1,BufSiz,fp.get());
        MapData.resize(BufSiz/=2);
    }
}

HRESULT DuplicateRGBxPlane(ID3D12GraphicsCommandList6* pCmdList,ID3D12Resource* pImgTex2D,
    ID3D12Resource* pRgbTex2D,D3D12_RESOURCE_STATES InitStat,UINT PicWid,UINT PicHit)
{
    UINT BufSiz(PicWid*PicHit*sizeof(UINT));
    vector<BYTE> MapData(BufSiz);
    HRESULT hr(E_INVALIDARG);

    hr=pRgbTex2D->ReadFromSubresource(MapData.data(),PicWid*sizeof(UINT),BufSiz,0,nullptr);
    hr=pImgTex2D->WriteToSubresource(0,nullptr,MapData.data(),PicWid*sizeof(UINT),BufSiz);

    //资源状态转换:COPY_SOURCE->ALL_SHADER_RESOURCE  TransResStat(pCmdList,pImgTex2D,InitStat,D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE);

    return(hr);
}

inline UINT64 AlignBufSize(UINT64 BufSize,const UINT64 BulkSize)
{
    return((BufSize+BulkSize-1)&~(BulkSize-1));
}

void ReadRawStrm(string& file,vector<BYTE>& RawData)
{
    shared_ptr<FILE> fp(fopen(file.c_str(),"rb"),bind(fclose,_1));
    RawData.resize(_filelength(_fileno(fp.get())));
    fread(&RawData[0],1,RawData.size(),fp.get());
}

template<typename...TYPE>
void OutputDebugPrintf(const string& FmtStr,TYPE&&...ArgsList)
{
    size_t dalen(1+snprintf(nullptr,0,FmtStr.c_str(),ArgsList...));
    string PrnBuf(dalen,'\0');

    snprintf(&PrnBuf[0],dalen,FmtStr.c_str(),ArgsList...);
    OutputDebugString(PrnBuf.c_str());
}

template<typename...TYPE>
void OutputDebugPrintf(const wstring& FmtStr,TYPE&&...ArgsList)
{
    size_t dalen(1+swprintf(nullptr,0,FmtStr.c_str(),ArgsList...));
    wstring PrnBuf(dalen,L'\0');

    swprintf(&PrnBuf[0],dalen,FmtStr.c_str(),ArgsList...);
    OutputDebugStringW(PrnBuf.c_str());
}

还有一个用于上传顶点缓冲数据的模板类:
template<typename TYPE>
class UploadBuffer
{
public:
    UploadBuffer(ID3D12Device8* pD3DxDev,UINT ElemNum,BOOL bConstBuf):m_bConstBuf(bConstBuf)
    {
        m_ElemSize=CalcConstBufByteLen(sizeof(TYPE));
        m_ElemSize=(!m_bConstBuf)?sizeof(TYPE):m_ElemSize;
        
        CD3DX12_HEAP_PROPERTIES HeapProp=CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
        CD3DX12_RESOURCE_DESC ResDesc=CD3DX12_RESOURCE_DESC::Buffer(m_ElemSize*ElemNum);

        HRESULT hr(pD3DxDev->CreateCommittedResource(&HeapProp,D3D12_HEAP_FLAG_NONE,&ResDesc,
            D3D12_RESOURCE_STATE_GENERIC_READ,nullptr,IID_PPV_ARGS(&m_pUploadInst)));

        CD3DX12_RANGE MapRng(0,0);
        hr=m_pUploadInst->Map(0,&MapRng,(LPVOID*)&m_pMapData);
    }

    UploadBuffer(const UploadBuffer&)=delete;
    UploadBuffer& operator=(const UploadBuffer&)=delete;

    UINT CalcConstBufByteLen(UINT ByteNum)
    {
        return((ByteNum+255)&~255);
    }

    virtual ~UploadBuffer()
    {
        if(m_pUploadInst!=nullptr) m_pUploadInst->Unmap(0,nullptr);
        m_pMapData=nullptr;
    }

    ID3D12Resource* RetUploadInst() const
    {
        return(m_pUploadInst);
    }

    LPBYTE RetMapPin() const
    {
        return(m_pMapData);
    }

    void CopyData(UINT ElemIdx,const TYPE& ElemData)
    {
        CopyMemory(m_pMapData+ElemIdx*m_ElemSize,&ElemData,m_ElemSize);
    }

protected:
    CComPtr<ID3D12Resource> m_pUploadInst=nullptr;
    LPBYTE m_pMapData=nullptr;
    BOOL m_bConstBuf=0;
    UINT m_ElemSize=0;
};

与之相关的数据结构:
typedef struct _D3DVertex
{
    XMFLOAT3 pos;
    XMFLOAT3 nml;
    XMFLOAT2 tex;
}D3DVertex,* LPD3DVertex;

typedef struct _POINTZ
{
    int x,y,z;
}POINTZ,* LPPOINTZ;

还有所需的 shadervp.hlsl 文件:

//顶点着色器输入
struct VertexIn
{
    float3 PosL: POSITION;
    float3 NrmL: NORMAL;
    float2 TexC: TEXCOORD;
};

//顶点着色器输出, 像素着色器输入
struct VertexOut
{
    float4 PosH: SV_POSITION;
    float3 NrmH: NORMAL;
    float2 TexC: TEXCOORD;
};

Texture2D g_Tex0: register(t0);
SamplerState g_TSS0: register(s0);

//顶点着色例程
VertexOut VSMain(VertexIn vin)
{
    VertexOut vout;
    
    vout.PosH=float4(vin.PosL,1.0);
    vout.NrmH=vin.NrmL;
    vout.TexC=vin.TexC;

    return vout;
}

//像素着色例程
float4 PSMain(VertexOut vout): SV_Target
{
    float3 color=g_Tex0.Sample(g_TSS0,vout.TexC);
    float4 outcolor=float4(color,1.0);

    return outcolor;
}

以EVA最新剧场版为例(大概224306帧),随便使用任何方法,任何工具,定位到剧集中间,保存完整的一帧IDR图像(包含sps,pps,slice-data)在硬盘,填入正确路径到file中,运行代码。现在,确实看到葛城美里舰长指挥星舰奇迹号攻击NERV总部的画面。由于对D3D12和H264一知半解,结果走了很多弯路。船到桥门自然直,折腾将近一个月,历经各种挫败感,仅有的一点斩获。这篇有点长,所需的头文件和库文件附在另外篇章中。

//参考:



.yang/gst-plugins-bad/-/tree/d3d12/sys/d3d12



D3D12HelloVADecode

本文标签: D3D12解码显示H264的IDR帧图像