NVCodec 解码案例

下面是nvcodec解码案例,可以参考:

#include "nvcuvid.h"
#include <cuda_runtime.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "utils/demuxer.h"

#define START_CODE_ANNEXB 0x00000001
#define START_CODE_SIZE 4

typedef struct packet_t {
    uint8_t* data;
    int      size;
    int64_t  pts;
} packet;

typedef struct nvcuvid_test_ctx_t {
    CUvideodecoder    cudecoder;
    CUvideoparser     cuparser;
    CUVIDPARSERPARAMS cuparseinfo;
    CUVIDDECODECAPS   caps8;
    CUVIDDECODECAPS   caps10;
    CUVIDDECODECAPS   caps12;
    CUcontext         cu_ctx;
    CUvideoctxlock    cu_ctx_lock;

    FFmpegDemuxer* demuxer;

    uint64_t    frame_count;
    const char* in_file_name;
    void*       host_data;
    void*       tmp_data;
    int         tmp_data_size;

    int                      coded_width;
    int                      coded_height;
    int                      out_width;
    int                      out_height;
    cudaVideoCodec           codec_type;
    int                      bit_depth_minus8;  // 0-8bit, 2-10bit, 4-12bit
    int                      decode_surface;
    unsigned char            progressive_sequence;
    unsigned int             bit_rate;
    cudaVideoSurfaceFormat   out_pix_fmt;
    cudaVideoChromaFormat    in_pix_fmt;
    cudaVideoDeinterlaceMode deinter_lace_mode;

    struct {
        int left;
        int top;
        int right;
        int bottom;
    } crop;

    struct {
        int width;
        int height;
    } resize;

    struct {
        unsigned int num;
        unsigned int den;
    } framerate;
    int   eos;
    int   nb_surfaces;
    int   internal_error;
    int   need_resize;
    int   decoder_flushing;
    int   in_file_size;
    int   read_size;
    FILE* in_file;
    FILE* out_file;
} nvcuvid_test_ctx;

static void print_caps(CUVIDDECODECAPS* caps) {
    printf("eCodecType: %d\n", caps->eCodecType);
    printf("eChromaFormat: %d\n", caps->eChromaFormat);
    printf("nBitDepthMinus8: %d\n", caps->nBitDepthMinus8);
    printf("bIsSupported: %d\n", caps->bIsSupported);
    printf("nNumNVDECs: %d\n", caps->nNumNVDECs);
    printf("nMaxWidth: %d\n", caps->nMaxWidth);
    printf("nMaxHeight: %d\n", caps->nMaxHeight);
    printf("nMaxMBCount: %d\n", caps->nMaxMBCount);
    printf("nMinWidth: %d\n", caps->nMinWidth);
    printf("nMinHeight: %d\n", caps->nMinHeight);
    printf("nMaxMBCount: %d\n", caps->nMaxMBCount);
}

static int handle_video_sequence(void* opaque, CUVIDEOFORMAT* format) {
    nvcuvid_test_ctx*           ctx    = NULL;
    CUVIDDECODECAPS*            caps   = NULL;
    CUVIDDECODECREATEINFO       cuinfo = {0};
    CUVIDRECONFIGUREDECODERINFO cf     = {0};

    ctx                 = (nvcuvid_test_ctx*)opaque;
    ctx->internal_error = 0;

    // getcaps
    caps                = &ctx->caps8;
    caps->eCodecType    = format->codec;
    caps->eChromaFormat = format->chroma_format;
    caps->nBitDepthMinus8 =
        format->bit_depth_luma_minus8;  // 0-8bit, 2-10bit, 4-12bit
    // cuCtxPushCurrent(ctx->cu_ctx);
    ctx->internal_error = cuvidGetDecoderCaps(caps);
    if (ctx->internal_error != 0) {
        printf("cuvidGetDecoderCaps failed,ret=%d\n", ctx->internal_error);
        goto err;
    }
    // cuCtxPopCurrent(NULL);

    print_caps(caps);
    if (!caps->bIsSupported) {
        printf("Codec not supported on this GPU\n");
        goto err;
    }

    // check if need to reinit decoder
    ctx->bit_rate             = format->bitrate;
    ctx->codec_type           = format->codec;
    ctx->coded_width          = format->coded_width;
    ctx->coded_height         = format->coded_height;
    ctx->in_pix_fmt           = format->chroma_format;
    ctx->framerate.num        = format->frame_rate.numerator;
    ctx->framerate.den        = format->frame_rate.denominator;
    ctx->bit_depth_minus8     = format->bit_depth_luma_minus8;
    ctx->progressive_sequence = format->progressive_sequence;
    ctx->decode_surface       = format->min_num_decode_surfaces;
    printf(
        "bitrate:%d,codec:%d,coded_width:%d,coded_height:%d,chroma_format:%d,"
        "frame_rate:%d/%d,bit_depth_luma_minus8:%d,progressive_sequence:%d,"
        "min_num_decode_surfaces:%d\n",
        ctx->bit_rate, ctx->codec_type, ctx->coded_width, ctx->coded_height,
        ctx->in_pix_fmt, ctx->framerate.num, ctx->framerate.den,
        ctx->bit_depth_minus8, ctx->progressive_sequence, ctx->decode_surface);

    // do some checks
    // a.check max resolution supported
    // b.check max mb count supported

    // if the decoder is already created, reconfigure it
    if (ctx->cudecoder) {
        printf("Re-initializing decoder\n");
        cf.ulWidth        = ctx->coded_width;
        cf.ulHeight       = ctx->coded_height;
        cf.ulTargetWidth  = ctx->out_width;
        cf.ulTargetHeight = ctx->out_height;

        cf.display_area.left   = format->display_area.left + ctx->crop.left;
        cf.display_area.top    = format->display_area.top + ctx->crop.top;
        cf.display_area.right  = format->display_area.right - ctx->crop.right;
        cf.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;
        cf.ulNumDecodeSurfaces = ctx->decode_surface;

        // reconfigure decoder
        ctx->internal_error = cuvidReconfigureDecoder(ctx->cudecoder, &cf);
        if (ctx->internal_error != 0) {
            printf("cuvidReconfigureDecoder failed\n");
            goto err;
        }
        printf("Re-initialized decoder\n");
    }

    // for the first time, create decoder
    memset(&cuinfo, 0, sizeof(CUVIDDECODECREATEINFO));
    cuinfo.CodecType      = (cudaVideoCodec)ctx->codec_type;
    cuinfo.ChromaFormat   = ctx->in_pix_fmt;
    cuinfo.OutputFormat   = ctx->out_pix_fmt;
    cuinfo.bitDepthMinus8 = ctx->bit_depth_minus8;
    if (ctx->progressive_sequence)
        cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
    else
        cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
    cuinfo.ulNumOutputSurfaces = 2;
    cuinfo.ulCreationFlags     = cudaVideoCreate_PreferCUVID;
    cuinfo.ulNumDecodeSurfaces = ctx->decode_surface;
    // cuinfo.vidLock             = ctx->cu_ctx_lock;
    cuinfo.ulWidth  = ctx->coded_width;
    cuinfo.ulHeight = ctx->coded_height;
    printf(
        "codec:%d,chroma_format:%d,width:%d,height:%d,output_format:%d,"
        "num_decode_surfaces:%d,num_output_surfaces:%d,creation_flags:%d,"
        "vidLock:%p,bitDepthMinus8:%d\n",
        cuinfo.CodecType, cuinfo.ChromaFormat, cuinfo.ulWidth, cuinfo.ulHeight,
        cuinfo.OutputFormat, cuinfo.ulNumDecodeSurfaces,
        cuinfo.ulNumOutputSurfaces, cuinfo.ulCreationFlags, cuinfo.vidLock,
        cuinfo.bitDepthMinus8);

    printf("createdecoder,ulNumDecodeSurfaces:%d\n",
           cuinfo.ulNumDecodeSurfaces);

    // apply cropping
    // cuinfo.display_area.left   = format->display_area.left + ctx->crop.left;
    // cuinfo.display_area.top    = format->display_area.top + ctx->crop.top;
    // cuinfo.display_area.right  = format->display_area.right -
    // ctx->crop.right; cuinfo.display_area.bottom = format->display_area.bottom
    // - ctx->crop.bottom;
    // printf("display_area:left:%d,top:%d,right:%d,bottom:%d\n",
    //        cuinfo.display_area.left, cuinfo.display_area.top,
    //        cuinfo.display_area.right, cuinfo.display_area.bottom);

    ctx->out_width = ctx->coded_width;    // cuinfo.display_area.right -
                                          // cuinfo.display_area.left;
    ctx->out_height = ctx->coded_height;  // cuinfo.display_area.bottom -
                                          // cuinfo.display_area.hecoded_height
    printf("out_width:%d,out_height:%d\n", ctx->out_width, ctx->out_height);

    // target width/height need to be multiples of two align to 2
    cuinfo.ulTargetWidth  = ctx->out_width;
    cuinfo.ulTargetHeight = ctx->out_height;
    // // aspect ratio conversion, 1:1, depends on scaled resolution
    // cuinfo.target_rect.left   = 0;
    // cuinfo.target_rect.top    = 0;
    // cuinfo.target_rect.right  = cuinfo.ulTargetWidth;
    // cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
    // printf("target_rect:left:%d,top:%d,right:%d,bottom:%d\n",
    //        cuinfo.target_rect.left, cuinfo.target_rect.top,
    //        cuinfo.target_rect.right, cuinfo.target_rect.bottom);
    // max resolution
    cuinfo.ulMaxWidth  = cuinfo.ulTargetWidth;
    cuinfo.ulMaxHeight = cuinfo.ulTargetHeight;
    printf("max resolution:width:%d,height:%d\n", cuinfo.ulMaxWidth,
           cuinfo.ulMaxHeight);

    // cuCtxPushCurrent(ctx->cu_ctx);
    ctx->internal_error = cuvidCreateDecoder(&ctx->cudecoder, &cuinfo);
    if (ctx->internal_error < 0) {
        printf("cuvidCreateDecoder failed\n");
        goto err;
    }
    // cuCtxPopCurrent(NULL);
    printf("cuvidCreateDecoder success, ret:%d\n", ctx->internal_error);

err:

    return (0 == ctx->internal_error);
}

static int handle_picture_display(void* opaque, CUVIDPARSERDISPINFO* dispinfo) {
    int                  ret       = 0;
    int                  bpp       = 1;
    void*                dst_frame = NULL;
    int                  dst_size  = 0;
    nvcuvid_test_ctx*    ctx       = NULL;
    CUVIDGETDECODESTATUS s;
    printf("<<<handle_picture_display\n");

    ctx                 = (nvcuvid_test_ctx*)opaque;
    ctx->internal_error = 0;

    if (ctx->bit_depth_minus8 > 0) {
        bpp = 2;
    }

    if (!dispinfo) {
        printf("dispinfo is null\n");
        return 0;
    }

    CUVIDPROCPARAMS map_info   = {0};
    map_info.progressive_frame = dispinfo->progressive_frame;
    map_info.second_field      = dispinfo->repeat_first_field + 1;
    map_info.top_field_first   = dispinfo->top_field_first;
    map_info.unpaired_field    = dispinfo->repeat_first_field < 0;
    map_info.output_stream     = NULL;

    CUdeviceptr  map_frame = 0;
    unsigned int map_pitch = 0;  // for 8 bit pitch=1, for 10/12 bit pitch=2
    ret = cuvidMapVideoFrame(ctx->cudecoder, dispinfo->picture_index,
                             &map_frame, &map_pitch, &map_info);
    if (ret != 0) {
        printf("cuvidMapVideoFrame failed\n");
        ctx->internal_error = ret;
        goto err;
    }

    memset(&s, 0, sizeof(s));
    ret = cuvidGetDecodeStatus(ctx->cudecoder, dispinfo->picture_index, &s);
    if (ret != 0) {
        printf("cuvidGetDecodeStatus failed\n");
        ctx->internal_error = ret;
        goto err;
    }

    if (s.decodeStatus == cuvidDecodeStatus_Error ||
        s.decodeStatus == cuvidDecodeStatus_Error_Concealed) {
        printf("Decode Error occurred for picture.\n");
        exit(1);
    }
    printf("cuvidMapVideoFrame success,pitch:%u\n", map_pitch);

    dst_frame = NULL;
    // only for nv12
    dst_size  = ctx->out_width * ctx->out_height * bpp * 1.5;
    dst_frame = (uint8_t*)malloc(dst_size);
    // cpy to host
    ret = cudaMemcpy(dst_frame, (void*)map_frame, dst_size,
                     cudaMemcpyDeviceToHost);
    if (ret != 0) {
        printf("cuMemcpyDtoH failed\n");
        ctx->internal_error = ret;
        goto err;
    }
    fwrite(dst_frame, 1, dst_size, ctx->out_file);
    ctx->internal_error = cuvidUnmapVideoFrame(ctx->cudecoder, map_frame);
    if (ctx->internal_error != 0) {
        printf("cuvidUnmapVideoFrame failed\n");
        goto err;
    }
    printf("cuvidUnmapVideoFrame success\n");

err:

    return (0 == ctx->internal_error);

    static int handle_picture_decode(void* opaque, CUVIDPICPARAMS* picpara) {
        nvcuvid_test_ctx* ctx = NULL;
        ctx                   = (nvcuvid_test_ctx*)opaque;
        if (ctx == NULL) {
            printf("ctx is null\n");
            return 0;
        }
        // decode picture
        cuCtxPushCurrent(ctx->cu_ctx);
        ctx->internal_error = cuvidDecodePicture(ctx->cudecoder, picpara);
        if (ctx->internal_error < 0) {
            printf("cuvidDecodePicture failed\n");
            goto err;
        }

        printf("cuvidDecodePicture success,ret=%d.\n", ctx->internal_error);
        cuCtxPopCurrent(NULL);
    err:
        return (0 == ctx->internal_error);
    }

    static int start_code_is_annexb(nvcuvid_test_ctx * ctx, uint8_t * data,
                                    int pos) {
        if (data[pos] == 1 && data[pos - 1] == 0 && data[pos - 2] == 0 &&
            data[pos - 3] == 0) {
            return 2;
        }
        // do not swap the order
        if (data[pos] == 1 && data[pos - 1] == 0 && data[pos - 2] == 0) {
            return 1;
        }
        return 0;
    }

    static int check_format(nvcuvid_test_ctx * ctx) {
        // read file annexb h264 start code:0x00 00 01
        int ret;

        if (!ctx->tmp_data) {
            ctx->tmp_data = malloc(1024);
        }
        uint8_t* d = (uint8_t*)ctx->tmp_data;
        // read firt 4 bytes
        ret = fread(d, 1, START_CODE_SIZE, ctx->in_file);
        if (ret != START_CODE_SIZE) {
            printf("read file failed\n");
            return -1;
        }
        // this first start code is 0x00 00 00 01
        ret = start_code_is_annexb(ctx, d, START_CODE_SIZE - 1);
        if (ret != 2) {
            printf("file format error\n");
            printf("start code:%02x %02x %02x %02x\n", d[0], d[1], d[2], d[3]);
            return -1;
        }
        ctx->tmp_data_size = START_CODE_SIZE;
        printf("start code:%02x %02x %02x %02x\n", d[0], d[1], d[2], d[3]);
        return 0;
    }

    static int open_file(nvcuvid_test_ctx * ctx) {
        ctx->in_file = fopen(ctx->in_file_name, "rb");
        if (!ctx->in_file) {
            printf("open input file:%s failed\n", ctx->in_file_name);
            return -1;
        }
        fseek(ctx->in_file, 0, SEEK_END);
        ctx->in_file_size = ftell(ctx->in_file);
        fseek(ctx->in_file, 0, SEEK_SET);
        printf("input file size:%d\n", ctx->in_file_size);

        char out_file_name[1024];
        memset(out_file_name, 0, sizeof(out_file_name));
        snprintf(out_file_name, sizeof(out_file_name), "%s.yuv",
                 ctx->in_file_name);
        ctx->out_file = fopen(out_file_name, "wb+");
        if (!ctx->out_file) {
            printf("open output file:%s failed\n", out_file_name);
            return -1;
        }

        return check_format(ctx);
    }

    static void print_hex(uint8_t * data, int all_size, int print_size) {
        int n = 0;
        printf("data size:%d\n", all_size);
        for (int i = 0; i < print_size; i++) {
            if (i != 0 && i % 2 == 0) {
                printf(" ");
            }
            printf("%02x", data[i]);
            n++;
            if (n % 64 == 0) {
                printf("\n");
            }
        }
        printf("\n");
    }

    static int nal_type(uint8_t data) {
        printf("nal type:0x%02x\n", data);
        int type = data & 0x1f;
        switch (type) {
            case 1:
                printf("NAL_TYPE_SLICE\n");
                break;
            case 5:
                printf("NAL_TYPE_IDR\n");
                break;
            case 6:
                printf("NAL_TYPE_SEI\n");
                break;
            case 7:
                printf("NAL_TYPE_SPS\n");
                break;
            case 8:
                printf("NAL_TYPE_PPS\n");
                break;
            case 9:
                printf("NAL_TYPE_AUD\n");
                break;
            case 12:
                printf("NAL_TYPE_EOSEQ\n");
                break;
            default:
                printf("NAL_TYPE_UNKNOWN\n");
                break;
        }
        return type;
    }

    static int demux(nvcuvid_test_ctx * ctx, CUVIDSOURCEDATAPACKET * pkt) {
        int      again = 0;
        int      size;
        int      pos = 0;
        uint8_t* data;
        if (!ctx->host_data) {
            ctx->host_data = (uint8_t*)malloc(1024 * 1024);
            printf("malloc host data,size:1024*1024\n");
        }

        do {
            size  = 0;
            data  = (uint8_t*)ctx->host_data;
            again = 0;
            memset(data, 0, 1024 * 1024);
            memcpy(data, ctx->tmp_data, ctx->tmp_data_size);
            size               = ctx->tmp_data_size;
            pos                = ctx->tmp_data_size;
            ctx->tmp_data_size = 0;
            while (1) {
                data[size++] = fgetc(ctx->in_file);
                if (feof(ctx->in_file)) {
                    size--;
                    break;
                }
                // check start code
                int type = start_code_is_annexb(ctx, data, size - 1);
                if (type == 1) {
                    ctx->tmp_data_size = 3;
                    memcpy(ctx->tmp_data, data + size - 3, 3);
                    size -= 3;
                    break;
                }
                if (type == 2) {
                    ctx->tmp_data_size = 4;
                    memcpy(ctx->tmp_data, data + size - 4, 4);
                    size -= 4;
                    break;
                }
            }

            if (size > 0) {
                pkt->payload_size = size;
                pkt->timestamp    = 0;
                pkt->payload      = (const unsigned char*)ctx->host_data;
                // detect nal type
                print_hex((uint8_t*)pkt->payload, size, size > 32 ? 32 : size);
                int type = nal_type(data[pos]);
                if (type == 9) {
                    again = 1;
                    printf("sei/aud data, read size:%d, discard.\n", size);
                    printf("---------------------------------\n");
                }
            } else {
                size       = 0;
                pkt->flags = CUVID_PKT_ENDOFSTREAM | CUVID_PKT_NOTIFY_EOS;
                ctx->decoder_flushing = 1;
                pkt->payload          = NULL;
                pkt->payload_size     = 0;
                pkt->timestamp        = 0;

                printf("end of stream, read size:%d\n", ctx->read_size);
            }
            ctx->read_size += size;
        } while (again);
        return 0;
    }

    static int demux_ffmpeg(nvcuvid_test_ctx * ctx,
                            CUVIDSOURCEDATAPACKET * pkt) {
        void*   data = NULL;
        int     size = 0;
        int64_t pts  = 0;
        if (!ctx->host_data) {
            ctx->host_data = (uint8_t*)malloc(1024 * 1024);
            printf("malloc host data,size:1024*1024\n");
        }
        ctx->demuxer->Demux((uint8_t**)&data, &size, &pts);
        if (size > 0) {
            ctx->read_size += size;
            pkt->payload_size = size;
            pkt->timestamp    = pts;
            pkt->flags |= CUVID_PKT_TIMESTAMP;  // | CUVID_PKT_TIMESTAMP;
            // memcpy(ctx->host_data, data, size);
            pkt->payload = (const unsigned char*)data;
            // detect nal type
            // print_hex((uint8_t*)pkt->payload, size, size > 32 ? 32 : size);
            // int type = nal_type(((uint8_t*)pkt->payload)[0]);
            // if (type == 9) {
            //     printf("sei/aud data, read size:%d, discard.\n", size);
            //     printf("---------------------------------\n");
            // }
        } else {
            size                  = 0;
            pkt->flags            = CUVID_PKT_ENDOFSTREAM;
            ctx->decoder_flushing = 1;
            pkt->payload          = NULL;
            pkt->payload_size     = 0;
            pkt->timestamp        = 0;

            printf("end of stream, read size:%d\n", ctx->read_size);
        }
        return 0;
    }
    static int cu_init(nvcuvid_test_ctx * ctx) {
        int   ret;
        void* test;
        ret = cuInit(0);
        if (ret != 0) {
            printf("cuInit failed\n");
            return -1;
        }
        ret = cudaMalloc(&test, 1024);
        if (ret != 0) {
            printf("cudaMalloc failed\n");
            return -1;
        }
        cudaFree(test);
        CUdevice cu_device = 0;
        int      gpu       = 0;
        cuDeviceGet(&cu_device, gpu);
        char gpu_name[80];
        cuDeviceGetName(gpu_name, sizeof(gpu_name), cu_device);
        printf("GPU in use:%s\n", gpu_name);

        int nGpu = 0;
        cuDeviceGetCount(&nGpu);
        printf("cuDeviceGetCount:%d\n", nGpu);
        ret = cuCtxCreate(&ctx->cu_ctx, 0, 0);
        if (ret != 0) {
            printf("createCudaContext failed\n");
            return -1;
        }
        ret = cuvidCtxLockCreate(&ctx->cu_ctx_lock, ctx->cu_ctx);
        if (ret != 0) {
            printf("cuvidCtxLockCreate failed\n");
            return -1;
        }

        return 0;
    }

    int main(int argc, char* argv[]) {
        int               ret;
        nvcuvid_test_ctx* ctx;

        if (argc < 2) {
            printf("Usage: %s <input_file>\n", argv[0]);
            return -1;
        }

        ctx = (nvcuvid_test_ctx*)malloc(sizeof(nvcuvid_test_ctx));
        memset(ctx, 0, sizeof(nvcuvid_test_ctx));
        ctx->in_file_name = argv[1];

        ret = open_file(ctx);
        if (ret != 0) {
            printf("open file failed\n");
            goto error;
        }

        // 1.init cuda
        ret = cu_init(ctx);
        if (ret != 0) goto error;

        // 2.create parser
        ctx->out_pix_fmt = cudaVideoSurfaceFormat_NV12;
        ctx->codec_type  = cudaVideoCodec_H264;

        // fill parser info
        memset(&ctx->cuparseinfo, 0, sizeof(CUVIDPARSERPARAMS));
        ctx->cuparseinfo.CodecType              = ctx->codec_type;
        ctx->cuparseinfo.ulMaxNumDecodeSurfaces = 1;
        ctx->cuparseinfo.ulClockRate            = 1000;
        ctx->cuparseinfo.pUserData              = ctx;
        ctx->cuparseinfo.ulMaxDisplayDelay      = 1;
        ctx->cuparseinfo.pfnSequenceCallback    = handle_video_sequence;
        ctx->cuparseinfo.pfnDecodePicture       = handle_picture_decode;
        ctx->cuparseinfo.pfnDisplayPicture      = handle_picture_display;
        ret = cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo);
        if (ret != 0) {
            printf("cuvidCreateVideoParser failed\n");
            goto error;
        }

        ctx->demuxer = new FFmpegDemuxer(ctx->in_file_name);

        // decode video
        while (!ctx->decoder_flushing) {
            CUVIDSOURCEDATAPACKET pkt = {0};
            memset(&pkt, 0, sizeof(CUVIDSOURCEDATAPACKET));
            ret = demux_ffmpeg(ctx, &pkt);
            if (ret != 0) {
                printf("demux packet failed\n");
                goto error;
            }
            // parse data
            // cuCtxPushCurrent(ctx->cu_ctx);
            print_hex((uint8_t*)pkt.payload, pkt.payload_size,
                      pkt.payload_size > 32 ? 32 : pkt.payload_size);
            ret = cuvidParseVideoData(ctx->cuparser, &pkt);
            if (ret != 0) {
                printf("cuvidParseVideoData failed\n");
                goto error;
            }
            // cuCtxPopCurrent(NULL);
            ctx->frame_count++;
            printf("parse video pkt[%ld] success\n", ctx->frame_count);
            printf("-----------------------\n");
        }

    error:
        // 3.destroy
        if (!ctx) return 0;
        if (ctx->cuparser) cuvidDestroyVideoParser(ctx->cuparser);
        if (ctx->cudecoder) cuvidDestroyDecoder(ctx->cudecoder);
        if (ctx->in_file) fclose(ctx->in_file);
        if (ctx->out_file) fclose(ctx->out_file);
        if (ctx->host_data) free(ctx->host_data);
        if (ctx->tmp_data) free(ctx->tmp_data);
        if (ctx->demuxer) delete ctx->demuxer;
        cuCtxDestroy(ctx->cu_ctx);
        cuvidCtxLockDestroy(ctx->cu_ctx_lock);
        if (ctx) free(ctx);
        printf("main end.\n");
        return 0;
    }
//---------------------------------------------------------------------------
// main.cpp
//
// cuviddec decode sample frontend
//
// Copyright  2008 NVIDIA Corporation. All rights reserved.
//---------------------------------------------------------------------------

#include <nvcuvid.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
// #include <platform/NvSystemClock/NvSystemClock.h>

// #if defined WIN32 || defined _WIN32

// #include <windows.h>
// #include <d3d9.h>
// #include <cudad3d9.h>

// #elif defined NV_UNIX

// #include <unistd.h>
// #include <sys/time.h>
// #include <time.h>
// #include <platform/include/NvTypes.h>
// #include <codecs/include/nvutil.h>

// #endif

#define MAX_FRM_CNT 16
#define DISPLAY_DELAY 2  // Attempt to decode up to 2 frames ahead of display
#define USE_ASYNC_COPY 1
#define USE_FLOATING_CONTEXTS 1  // Use floating contexts

// Autolock for floating contexts
class CAutoCtxLock {
   private:
    CUvideoctxlock m_lock;

   public:
#if USE_FLOATING_CONTEXTS
    CAutoCtxLock(CUvideoctxlock lck) {
        m_lock = lck;
        cuvidCtxLock(m_lock, 0);
    }
    ~CAutoCtxLock() { cuvidCtxUnlock(m_lock, 0); }
#else
    CAutoCtxLock(CUvideoctxlock lck) { m_lock = lck; }
#endif
};

typedef struct {
    CUvideoparser         cuParser;
    CUvideodecoder        cuDecoder;
    CUstream              cuStream;
    CUvideoctxlock        cuCtxLock;
    CUVIDDECODECREATEINFO dci;
    CUVIDPARSERDISPINFO   DisplayQueue[DISPLAY_DELAY];
    unsigned char*        pRawNV12;
    int                   raw_nv12_size;
    int                   pic_cnt;
    FILE*                 fd_yuv;
} DecodeSession;

static int DisplayPicture(DecodeSession*       state,
                          CUVIDPARSERDISPINFO* pPicParams);

//
//
// Parser callbacks
//

// Called when the decoder encounters a video format change (or initial sequence
// header)
static int CUDAAPI HandleVideoSequence(void*          pvUserData,
                                       CUVIDEOFORMAT* pFormat) {
    DecodeSession* state = (DecodeSession*)pvUserData;

    if ((pFormat->codec != state->dci.CodecType) ||
        (pFormat->coded_width != state->dci.ulWidth) ||
        (pFormat->coded_height != state->dci.ulHeight) ||
        (pFormat->chroma_format != state->dci.ChromaFormat)) {
        CAutoCtxLock lck(state->cuCtxLock);
        if (state->cuDecoder) {
            cuvidDestroyDecoder(state->cuDecoder);
            state->cuDecoder = NULL;
        }
        memset(&state->dci, 0, sizeof(CUVIDDECODECREATEINFO));
        state->dci.ulWidth             = pFormat->coded_width;
        state->dci.ulHeight            = pFormat->coded_height;
        state->dci.ulNumDecodeSurfaces = MAX_FRM_CNT;
        state->dci.CodecType           = pFormat->codec;
        state->dci.ChromaFormat        = pFormat->chroma_format;
        // Output (pass through)
        state->dci.OutputFormat = cudaVideoSurfaceFormat_NV12;
        state->dci.DeinterlaceMode =
            cudaVideoDeinterlaceMode_Weave;  // No deinterlacing
        state->dci.ulTargetWidth       = state->dci.ulWidth;
        state->dci.ulTargetHeight      = state->dci.ulHeight;
        state->dci.ulNumOutputSurfaces = 1;
        // Create the decoder
        if (CUDA_SUCCESS !=
            cuvidCreateDecoder(&state->cuDecoder, &state->dci)) {
            printf("Failed to create video decoder\n");
            return 0;
        }
    }
    return 1;
}

// Called by the video parser to decode a single picture
// Since the parser will deliver data as fast as it can, we need to make sure
// that the picture index we're attempting to use for decode is no longer used
// for display
static int CUDAAPI HandlePictureDecode(void*           pvUserData,
                                       CUVIDPICPARAMS* pPicParams) {
    DecodeSession* state = (DecodeSession*)pvUserData;
    CAutoCtxLock   lck(state->cuCtxLock);
    CUresult       result;

    if (pPicParams->CurrPicIdx < 0)  // Should never happen
    {
        printf("Invalid picture index\n");
        return 0;
    }
    result = cuvidDecodePicture(state->cuDecoder, pPicParams);
    if (result != CUDA_SUCCESS) {
        printf("cuvidDecodePicture: %d\n", result);
    }
    return (result == CUDA_SUCCESS);
}

// Called by the video parser to display a video frame (in the case of field
// pictures, there may be 2 decode calls per 1 display call, since two fields
// make up one frame)
static int CUDAAPI HandlePictureDisplay(void*                pvUserData,
                                        CUVIDPARSERDISPINFO* pPicParams) {
    DecodeSession* state = (DecodeSession*)pvUserData;
    printf("HandlePictureDisplay>>>>>\n");
    DisplayPicture(state, pPicParams);
    return 1;
}

static int DisplayPicture(DecodeSession*       state,
                          CUVIDPARSERDISPINFO* pPicParams) {
    CAutoCtxLock    lck(state->cuCtxLock);
    CUVIDPROCPARAMS vpp;
    CUdeviceptr     devPtr;
    CUresult        result;
    unsigned int    pitch = 0, w, h;
    int             nv12_size;

    memset(&vpp, 0, sizeof(vpp));
    vpp.progressive_frame = pPicParams->progressive_frame;
    vpp.top_field_first   = pPicParams->top_field_first;
    result = cuvidMapVideoFrame(state->cuDecoder, pPicParams->picture_index,
                                &devPtr, &pitch, &vpp);
    if (result != CUDA_SUCCESS) {
        printf("cuvidMapVideoFrame: %d\n", result);
        return 0;
    }
    w         = state->dci.ulTargetWidth;
    h         = state->dci.ulTargetHeight;
    nv12_size = pitch * (h + h / 2);  // 12bpp
    if ((!state->pRawNV12) || (nv12_size > state->raw_nv12_size)) {
        state->raw_nv12_size = 0;
        if (state->pRawNV12) {
            cuMemFreeHost(state->pRawNV12);  // Just to be safe (the pitch
                                             // should be constant)
            state->pRawNV12 = NULL;
        }
        result = cuMemAllocHost((void**)&state->pRawNV12, nv12_size);
        if (result != CUDA_SUCCESS)
            printf("cuMemAllocHost failed to allocate %d bytes (%d)\n",
                   nv12_size, result);
        state->raw_nv12_size = nv12_size;
    }
    if (state->pRawNV12) {
#if USE_ASYNC_COPY
        result = cuMemcpyDtoHAsync(state->pRawNV12, devPtr, nv12_size,
                                   state->cuStream);
        if (result != CUDA_SUCCESS) printf("cuMemcpyDtoHAsync: %d\n", result);
        // Gracefully wait for async copy to complete
        while (CUDA_ERROR_NOT_READY == cuStreamQuery(state->cuStream)) {
            // NVSleep(1);
            sleep(1);
        }
#else
        result = cuMemcpyDtoH(state->pRawNV12, devPtr, nv12_size);
#endif
    }
    cuvidUnmapVideoFrame(state->cuDecoder, devPtr);
    // Convert the output to standard IYUV and dump it to disk (note: very
    // inefficient)
    if ((state->fd_yuv) && (state->pRawNV12)) {
        unsigned int         y;
        const unsigned char* p    = state->pRawNV12;
        unsigned char*       iyuv = new unsigned char[w * h + w * (h >> 1)];

        // Copy luma
        for (y = 0; y < h; y++) {
            memcpy(iyuv + y * w, p + y * pitch, w);
        }
        // De-interleave chroma (NV12 stored as U,V,U,V,...)
        p += h * pitch;
        for (y = 0; y < h / 2; y++) {
            for (unsigned int x = 0; x < w / 2; x++) {
                iyuv[w * h + y * w / 2 + x] = p[y * pitch + x * 2];
                iyuv[w * h + (h / 2) * (w / 2) + y * w / 2 + x] =
                    p[y * pitch + x * 2 + 1];
            }
        }
        fwrite(iyuv, 1, w * h + w * (h / 2), state->fd_yuv);
        delete iyuv;
    }
    state->pic_cnt++;
    return 1;
}

//
//
// CUDA 2.0 initialization
//

#if defined WIN32 || defined _WIN32
IDirect3D9*                  g_pD3D   = NULL;
IDirect3DDevice9*            g_pD3Dev = NULL;
#endif

CUcontext      g_cuContext       = NULL;
CUdevice       g_cuDevice        = 0;
int            g_cuInstanceCount = 0;
CUvideoctxlock g_cuCtxLock       = NULL;

static bool InitCuda(CUvideoctxlock* pLock) {
    CUresult err;
    int      lAdapter, lAdapterCount;

    if (g_cuInstanceCount != 0) {
        g_cuInstanceCount++;
        *pLock = g_cuCtxLock;
        return true;
    }

    err = cuInit(0);
    if (err != CUDA_SUCCESS) {
        printf("ERROR: cuInit failed (%d)\n", err);
        return false;
    }

    lAdapterCount = 0;
    err           = cuDeviceGetCount(&lAdapterCount);
    if ((err) || (lAdapterCount <= 0)) {
        printf("Failed to find a CUDA 2.0 device (%d)\n", err);
        return false;
    }

    unsigned int ctx_flags = 0x00;
    for (g_cuDevice = 0; g_cuDevice < lAdapterCount; g_cuDevice++) {
        err = cuCtxCreate(&g_cuContext, ctx_flags, g_cuDevice);
        if (err == CUDA_SUCCESS) return true;
    }
    printf("Failed to create CUDA context (%d)\n", err);

    return false;
}

static bool DeinitCuda() {
    if (g_cuInstanceCount > 0) {
        if (--g_cuInstanceCount != 0) {
            return true;
        }
    }
#if USE_FLOATING_CONTEXTS
    if (g_cuCtxLock) {
        cuvidCtxLockDestroy(g_cuCtxLock);
        g_cuCtxLock = NULL;
    }
#endif
    if (g_cuContext) {
        CUresult err = cuCtxDestroy(g_cuContext);
        if (err != CUDA_SUCCESS)
            printf("WARNING: cuCtxDestroy failed (%d)\n", err);
        g_cuContext = NULL;
    }

#if defined WIN32 || defined _WIN32
    if (g_pD3Dev) {
        g_pD3Dev->Release();
        g_pD3Dev = NULL;
    }
    if (g_pD3D) {
        g_pD3D->Release();
        g_pD3D = NULL;
    }
#endif

    return true;
}

int main(int argc, char* argv[]) {
    unsigned char       io_buffer[16 * 1024];
    CUVIDPARSERPARAMS   parserInitParams;
    DecodeSession       state;
    CUstream            cuStream = NULL;
    CUresult            result;
    char*               arg_input  = NULL;
    char*               arg_output = NULL;
    FILE*               fInput     = NULL;
    int                 retval     = 1;
    int                 i, elapsed_time;
    cudaVideoCodec_enum codec = cudaVideoCodec_H264;

    if (argc < 2) {
        fprintf(stderr, "cuh264dec input.264  [output.yuv]\n");
        return retval;
    }

    for (i = 1; i < argc; i++) {
        if (!arg_input)
            arg_input = argv[i];
        else if (!arg_output)
            arg_output = argv[i];
        else if (!strcasecmp(argv[i], "mpeg1"))
            codec = cudaVideoCodec_MPEG1;
        else if (!strcasecmp(argv[i], "mpeg2"))
            codec = cudaVideoCodec_MPEG2;
        else if (!strcasecmp(argv[i], "mpeg4"))
            codec = cudaVideoCodec_MPEG4;
        else if (!strcasecmp(argv[i], "vc1"))
            codec = cudaVideoCodec_VC1;
        else if (!strcasecmp(argv[i], "h264"))
            codec = cudaVideoCodec_H264;
        else if (!strcasecmp(argv[i], "jpeg"))
            codec = cudaVideoCodec_JPEG;
        else {
            fprintf(stderr, "invalid parameter\n");
            return retval;
        }
    }

    if (!arg_input) {
        fprintf(stderr, "input file not specified\n");
        return retval;
    }
    memset(&state, 0, sizeof(state));

#if defined WIN32 || defined _WIN32
    timeBeginPeriod(1);
#endif

    // Initialize CUDA 2.0 using with D3D9 interoperability
    if (!InitCuda(&state.cuCtxLock)) {
        printf("Failed to initialize CUDA 2.0\n");
        goto exit;
    }
    // Create video parser
    memset(&parserInitParams, 0, sizeof(parserInitParams));
    parserInitParams.CodecType              = codec;
    parserInitParams.ulMaxNumDecodeSurfaces = MAX_FRM_CNT;
    parserInitParams.ulMaxDisplayDelay      = DISPLAY_DELAY;
    parserInitParams.pUserData              = &state;
    parserInitParams.pfnSequenceCallback    = HandleVideoSequence;
    parserInitParams.pfnDecodePicture       = HandlePictureDecode;
    parserInitParams.pfnDisplayPicture      = HandlePictureDisplay;
    result = cuvidCreateVideoParser(&state.cuParser, &parserInitParams);
    if (result != CUDA_SUCCESS) {
        printf("Failed to create video parser (%d)\n", result);
        goto exit;
    }
    {
        CAutoCtxLock lck(state.cuCtxLock);
        result = cuStreamCreate(&state.cuStream, 0);
        if (result != CUDA_SUCCESS) {
            printf("cuStreamCreate failed (%d)\n", result);
            goto exit;
        }
    }
    // Open input file
    fInput = fopen(arg_input, "rb");
    if (fInput == NULL) {
        printf("Failed to open \"%s\"\n", arg_input);
        goto exit;
    }
    // Open output file
    if (arg_output) {
        state.fd_yuv = fopen(arg_output, "wb");
        if (state.fd_yuv == NULL) {
            printf("Failed to create \"%s\"\n", arg_output);
            goto exit;
        }
    }
    // Start decoding
    printf("Decoding...\n");

    // elapsed_time = NvGetSystemClockMicrosecs() / 1000;

    for (;;) {
        CUVIDSOURCEDATAPACKET pkt;
        int len = fread(io_buffer, 1, sizeof(io_buffer), fInput);

        if (len <= 0) {
            // Flush the decoder
            pkt.flags        = CUVID_PKT_ENDOFSTREAM;
            pkt.payload_size = 0;
            pkt.payload      = NULL;
            pkt.timestamp    = 0;
            cuvidParseVideoData(state.cuParser, &pkt);
            break;
        }
        pkt.flags        = 0;
        pkt.payload_size = len;
        pkt.payload      = io_buffer;
        pkt.timestamp    = 0;  // not using timestamps
        cuvidParseVideoData(state.cuParser, &pkt);
    }

    // elapsed_time = NvGetSystemClockMicrosecs() / 1000 - elapsed_time;

    // retval = 0;
    // printf("Processed %d frames in %dms (%5.2ffps)\n",
    //     state.pic_cnt, elapsed_time,
    //     ((float)state.pic_cnt*1000.0/(float)elapsed_time));
exit:
    if (state.fd_yuv) {
        fclose(state.fd_yuv);
        state.fd_yuv = NULL;
    }
    // Delete all created objects
    if (state.cuParser != NULL) {
        cuvidDestroyVideoParser(state.cuParser);
        state.cuParser = NULL;
    }
    if (state.cuDecoder != NULL) {
        CAutoCtxLock lck(state.cuCtxLock);
        cuvidDestroyDecoder(state.cuDecoder);
        state.cuDecoder = NULL;
    }
    if (state.cuStream != NULL) {
        CAutoCtxLock lck(state.cuCtxLock);
        cuStreamDestroy(state.cuStream);
        state.cuStream = NULL;
    }
    if (state.pRawNV12) {
        cuMemFreeHost(state.pRawNV12);
        state.pRawNV12 = NULL;
    }
    DeinitCuda();
#if defined WIN32 || defined _WIN32
    timeEndPeriod(1);
#endif
    return retval;
}

下面是makefile:

cmake_minimum_required(VERSION 3.14)

project(nvcodec_test_case)
set(CMAKE_CXX_STANDARD 14)

# 编译命令通过 cmake .. -DCMAKE_BUILD_TYPE=Release/Debug来选择具体是那种模式
if(CMAKE_BUILD_TYPE AND(CMAKE_BUILD_TYPE STREQUAL "Debug"))
    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} $ENV{CXXFLAGS}  -O0 -Wall -g")
    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKEXX_C_FLAGS_DEBUG} $ENV{CXXFLAGS}  -O0 -Wall -g")
    message("Debug mode:${CMAKE_C_FLAGS_DEBUG}")
    message("Debug mode:${CMAKE_CXX_FLAGS_DEBUG}")
else()
    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -O3 -fPIC")
    message("Release mode:${CMAKE_C_FLAGS_RELEASE}")
endif()

set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} -fuse-ld=gold ")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")

set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin/)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)

set(FFMPEG_LIBRARIES avformat avcodec avutil swscale avdevice swresample pthread m lzma bz2 z) 
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/nvcuvid/include /usr/local/cuda-12.4/targets/x86_64-linux/include/ )
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/nvcuvid/lib/x86_64 /usr/local/cuda-12.4/targets/x86_64-linux/lib/)

include_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/ffmpeg/include)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/3rd/ffmpeg/lib)

get_property(inc_dirs DIRECTORY PROPERTY INCLUDE_DIRECTORIES)

foreach(dir ${inc_dirs})
    message(STATUS "include dir=${dir}")
endforeach()

get_property(lib_dirs DIRECTORY PROPERTY LINK_DIRECTORIES)

foreach(dir ${lib_dirs})
    message(STATUS "lib dir=${dir}")
endforeach()

link_directories(/opt/tops/lib)

set(UTILS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/demuxer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/log.cpp)

#for cuvid_test
add_executable(cuh264dec src/cuh264dec.cpp ${UTILS_FILES})
target_include_directories(cuh264dec PUBLIC ${PROJECT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include)
target_link_libraries(cuh264dec PUBLIC nvcuvid cudart cuda ${FFMPEG_LIBRARIES})

#for nvcuid_test
add_executable(nvcuvid_test src/nvcuvid_test.cpp ${UTILS_FILES})
target_include_directories(nvcuvid_test PUBLIC ${PROJECT_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include)
target_link_libraries(nvcuvid_test PUBLIC nvcuvid cudart cuda ${FFMPEG_LIBRARIES})


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值