Speex源码分析

最新推荐文章于 2024-09-08 17:45:50 发布

weixin_33729196

最新推荐文章于 2024-09-08 17:45:50 发布

阅读量290

点赞数

原文链接：http://www.cnblogs.com/wangtianxj/archive/2011/09/23/2186170.html

版权

/* * Bit-packing data structure representing (part of) a bit-stream. */(speex_bits.h-- 49)
typedef struct SpeexBits {
    char *chars;    /* *< "raw" data */
    int   nbBits;   /* *< Total number of bits stored in the stream */
    int   charPtr; /* *< Position of the byte "cursor" */
    int   bitPtr;   /* *< Position of the bit "cursor" within the current char */
    int   owner;    /* *< Does the struct "own" the "raw" buffer (member "chars") */
    int   overflow; /* *< Set to one if we try to read past the valid data */
    int   buf_size; /* *< Allocated size for buffer */
    int   reserved1; /* *< Reserved for future use */
    void *reserved2; /* *< Reserved for future use */
} SpeexBits;

EXPORT void speex_bits_init(SpeexBits *bits) (bits.c-- 48)
{
   bits->chars = ( char*)speex_alloc(MAX_CHARS_PER_FRAME);
    if (!bits->chars)
       return;

   bits->buf_size = MAX_CHARS_PER_FRAME;

   bits->owner= 1;

   speex_bits_reset(bits);
}

enc_state = speex_encoder_init(&speex_nb_mode);

typedef struct SpeexNBMode {  (modes.h-- 117)
    int     frameSize;       /* *< Size of frames used for encoding */
    int     subframeSize;    /* *< Size of sub-frames used for encoding */
    int     lpcSize;         /* *< Order of LPC filter */
    int     pitchStart;      /* *< Smallest pitch value allowed */
    int     pitchEnd;        /* *< Largest pitch value allowed */

   spx_word16_t gamma1;     /* *< Perceptual filter parameter #1 */
   spx_word16_t gamma2;     /* *< Perceptual filter parameter #2 */
   spx_word16_t   lpc_floor;       /* *< Noise floor for LPC analysis */

    const SpeexSubmode *submodes[NB_SUBMODES]; /* *< Sub-mode data for the mode */
    int     defaultSubmode; /* *< Default sub-mode to use when encoding */
    int     quality_map[ 11]; /* *< Mode corresponding to each quality setting */
} SpeexNBMode;

/* Default mode for narrowband */ (modes.c --- 320)
static const SpeexNBMode nb_mode = {
    160,     /* frameSize */
    40,      /* subframeSize */
    10,      /* lpcSize */
    17,      /* pitchStart */
    144,     /* pitchEnd */
#ifdef FIXED_POINT
    29491, 19661, /* gamma1, gamma2 */
#else
    0.9, 0.6, /* gamma1, gamma2 */
#endif
   QCONST16(. 0002, 15), /* lpc_floor */
   {NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
   &nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
    5,
   { 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};

/* Default mode for narrowband */ (modes.c --- 340)
EXPORT const SpeexMode speex_nb_mode = {
   &nb_mode,
   nb_mode_query,
    " narrowband ",
    0,
    4,
   &nb_encoder_init,
   &nb_encoder_destroy,   (nb_celp.c)
   &nb_encode,
   &nb_decoder_init,
   &nb_decoder_destroy,
   &nb_decode,
   &nb_encoder_ctl,
   &nb_decoder_ctl,
};

/* * Struct defining a Speex mode */     (speex.h-- 248)
typedef struct SpeexMode {
    /* * Pointer to the low-level mode data */
    const void *mode;

    /* * Pointer to the mode query function */
   mode_query_func query;

    /* * The name of the mode (you should not rely on this to identify the mode) */
    const char *modeName;

    /* *ID of the mode */
    int modeID;

    /* *Version number of the bitstream (incremented every time we break
    bitstream compatibility */
    int bitstream_version;

    /* * Pointer to encoder initialization function */
   encoder_init_func enc_init;

    /* * Pointer to encoder destruction function */
   encoder_destroy_func enc_destroy;

    /* * Pointer to frame encoding function */
   encode_func enc;

    /* * Pointer to decoder initialization function */
   decoder_init_func dec_init;

    /* * Pointer to decoder destruction function */
   decoder_destroy_func dec_destroy;

    /* * Pointer to frame decoding function */
   decode_func dec;

    /* * ioctl-like requests for encoder */
   encoder_ctl_func enc_ctl;

    /* * ioctl-like requests for decoder */
   decoder_ctl_func dec_ctl;

} SpeexMode;

/* *Structure representing the full state of the narrowband encoder */(nb_celp.h-- 49)
typedef struct EncState {
    const SpeexMode *mode;         /* *< Mode corresponding to the state */
    int    first;                  /* *< Is this the first frame? */
    int    frameSize;              /* *< Size of frames */
    int    subframeSize;           /* *< Size of sub-frames */
    int    nbSubframes;            /* *< Number of sub-frames */
    int    windowSize;             /* *< Analysis (LPC) window length */
    int    lpcSize;                /* *< LPC order */
    int    min_pitch;              /* *< Minimum pitch value allowed */
    int    max_pitch;              /* *< Maximum pitch value allowed */

   spx_word32_t cumul_gain;       /* *< Product of previously used pitch gains (Q10) */
    int    bounded_pitch;          /* *< Next frame should not rely on previous frames for pitch */
    int    ol_pitch;               /* *< Open-loop pitch */
    int    ol_voiced;              /* *< Open-loop voiced/non-voiced decision */
    int   *pitch;

#ifdef VORBIS_PSYCHO
   VorbisPsy *psy;
    float *psy_window;
    float *curve;
    float *old_curve;
#endif

   spx_word16_t  gamma1;          /* *< Perceptual filter: A(z/gamma1) */
   spx_word16_t  gamma2;          /* *< Perceptual filter: A(z/gamma2) */
   spx_word16_t  lpc_floor;       /* *< Noise floor multiplier for A[0] in LPC analysis */
    char  *stack;                  /* *< Pseudo-stack allocation for temporary memory */
   spx_word16_t *winBuf;          /* *< Input buffer (original signal) */
   spx_word16_t *excBuf;          /* *< Excitation buffer */
   spx_word16_t *exc;             /* *< Start of excitation frame */
   spx_word16_t *swBuf;           /* *< Weighted signal buffer */
   spx_word16_t *sw;              /* *< Start of weighted signal frame */
    const spx_word16_t *window;    /* *< Temporary (Hanning) window */
    const spx_word16_t *lagWindow;       /* *< Window applied to auto-correlation */
   spx_lsp_t *old_lsp;            /* *< LSPs for previous frame */
   spx_lsp_t *old_qlsp;           /* *< Quantized LSPs for previous frame */
   spx_mem_t *mem_sp;             /* *< Filter memory for signal synthesis */
   spx_mem_t *mem_sw;             /* *< Filter memory for perceptually-weighted signal */
   spx_mem_t *mem_sw_whole;       /* *< Filter memory for perceptually-weighted signal (whole frame) */
   spx_mem_t *mem_exc;            /* *< Filter memory for excitation (whole frame) */
   spx_mem_t *mem_exc2;           /* *< Filter memory for excitation (whole frame) */
   spx_mem_t mem_hp[ 2];           /* *< High-pass filter memory */
   spx_word32_t *pi_gain;         /* *< Gain of LPC filter at theta=pi (fe/2) */
   spx_word16_t *innov_rms_save; /* *< If non-NULL, innovation RMS is copied here */

#ifndef DISABLE_VBR
   VBRState *vbr;                 /* *< State of the VBR data */
    float  vbr_quality;            /* *< Quality setting for VBR encoding */
    float  relative_quality;       /* *< Relative quality that will be needed by VBR */
   spx_int32_t vbr_enabled;       /* *< 1 for enabling VBR, 0 otherwise */
   spx_int32_t vbr_max;           /* *< Max bit-rate allowed in VBR mode */
    int    vad_enabled;            /* *< 1 for enabling VAD, 0 otherwise */
    int    dtx_enabled;            /* *< 1 for enabling DTX, 0 otherwise */
    int    dtx_count;              /* *< Number of consecutive DTX frames */
   spx_int32_t abr_enabled;       /* *< ABR setting (in bps), 0 if off */
    float  abr_drift;
    float  abr_drift2;
    float  abr_count;
#endif /* #ifndef DISABLE_VBR */

    int    complexity;             /* *< Complexity setting (0-10 from least complex to most complex) */
   spx_int32_t sampling_rate;
    int    plc_tuning;
    int    encode_submode;
    const SpeexSubmode * const *submodes; /* *< Sub-mode data */
    int    submodeID;              /* *< Activated sub-mode */
    int    submodeSelect;          /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
    int    isWideband;             /* *< Is this used as part of the embedded wideband codec */
    int    highpass_enabled;         /* *< Is the input filter enabled */
} EncState;

speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c-- 1196)

st->submodeSelect = st->submodeID = (( const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值：{ 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}

st->submodeSelect： /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
st->submodeID：**< Activated sub-mode */

/* * Struct defining the encoding/decoding mode for SB-CELP (wideband) */
typedef struct SpeexSBMode {
    const SpeexMode *nb_mode;     /* *< Embedded narrowband mode */
    int     frameSize;      /* *< Size of frames used for encoding */
    int     subframeSize;   /* *< Size of sub-frames used for encoding */
    int     lpcSize;        /* *< Order of LPC filter */
   spx_word16_t gamma1;    /* *< Perceptual filter parameter #1 */
   spx_word16_t gamma2;    /* *< Perceptual filter parameter #1 */
   spx_word16_t   lpc_floor;      /* *< Noise floor for LPC analysis */
   spx_word16_t   folding_gain;

    const SpeexSubmode *submodes[SB_SUBMODES]; /* *< Sub-mode data for the mode */
    int     defaultSubmode; /* *< Default sub-mode to use when encoding */
    int     low_quality_map[ 11]; /* *< Mode corresponding to each quality setting */
    int     quality_map[ 11]; /* *< Mode corresponding to each quality setting */
#ifndef DISABLE_VBR
    const float (*vbr_thresh)[ 11];
#endif
    int     nb_modes;
} SpeexSBMode;

quality与bit-rate对照表：
mode quality bit-rate mflops quality/description

1 0 2, 150 6 Vocoder (mostly for comfort noise)
2 2 5, 950 9 Very noticeable artifacts/noise, good intelligibility
3 3- 4 8, 000 10 Artifacts/noise sometimes noticeable
4 5- 6 11, 000 14 Artifacts usually noticeable only with headphones
5 7- 8 15, 000 11 Need good headphones to tell the difference
6 9 18, 200 17.5 Hard to tell the difference even with good headphones
7 10 24, 600 14.5 Completely transparent for voice, good quality music
8 1 3, 950 10.5

speex_encode_int(enc_state, input_frame, &bits);

EXPORT int speex_encode_int( void *state, spx_int16_t * in, SpeexBits *bits)
{
    int i;
   spx_int32_t N;
    float float_in[MAX_IN_SAMPLES];
   speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
    for (i= 0;i<N;i++)
      float_in[i] = in[i];
    return (*((SpeexMode**)state))->enc(state, float_in, bits); // nb_encode   (cb_clep.c--252)
}

nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);

EXPORT int speex_bits_write(SpeexBits *bits, char *chars, int max_nbytes)    (bits.-- 188)
{
    int i;
    int max_nchars = max_nbytes/BYTES_PER_CHAR;
    int charPtr, bitPtr, nbBits;

    /* Insert terminator, but save the data so we can put it back after */
   bitPtr=bits->bitPtr;
   charPtr=bits->charPtr;
   nbBits=bits->nbBits;
   speex_bits_insert_terminator(bits);
   bits->bitPtr=bitPtr;
   bits->charPtr=charPtr;
   bits->nbBits=nbBits;

    if (max_nchars > ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR))
      max_nchars = ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR);

    for (i= 0;i<max_nchars;i++)
      chars[i]=HTOLS(bits->chars[i]);
    return max_nchars*BYTES_PER_CHAR;
}

转载于:https://www.cnblogs.com/wangtianxj/archive/2011/09/23/2186170.html