/*
* Bit-packing data structure representing (part of) a bit-stream.
*/(speex_bits.h--
49)
typedef struct SpeexBits {
char *chars; /* *< "raw" data */
int nbBits; /* *< Total number of bits stored in the stream */
int charPtr; /* *< Position of the byte "cursor" */
int bitPtr; /* *< Position of the bit "cursor" within the current char */
int owner; /* *< Does the struct "own" the "raw" buffer (member "chars") */
int overflow; /* *< Set to one if we try to read past the valid data */
int buf_size; /* *< Allocated size for buffer */
int reserved1; /* *< Reserved for future use */
void *reserved2; /* *< Reserved for future use */
} SpeexBits;
EXPORT void speex_bits_init(SpeexBits *bits) (bits.c-- 48)
{
bits->chars = ( char*)speex_alloc(MAX_CHARS_PER_FRAME);
if (!bits->chars)
return;
bits->buf_size = MAX_CHARS_PER_FRAME;
bits->owner= 1;
speex_bits_reset(bits);
}
enc_state = speex_encoder_init(&speex_nb_mode);
typedef struct SpeexNBMode { (modes.h-- 117)
int frameSize; /* *< Size of frames used for encoding */
int subframeSize; /* *< Size of sub-frames used for encoding */
int lpcSize; /* *< Order of LPC filter */
int pitchStart; /* *< Smallest pitch value allowed */
int pitchEnd; /* *< Largest pitch value allowed */
spx_word16_t gamma1; /* *< Perceptual filter parameter #1 */
spx_word16_t gamma2; /* *< Perceptual filter parameter #2 */
spx_word16_t lpc_floor; /* *< Noise floor for LPC analysis */
const SpeexSubmode *submodes[NB_SUBMODES]; /* *< Sub-mode data for the mode */
int defaultSubmode; /* *< Default sub-mode to use when encoding */
int quality_map[ 11]; /* *< Mode corresponding to each quality setting */
} SpeexNBMode;
/* Default mode for narrowband */ (modes.c --- 320)
static const SpeexNBMode nb_mode = {
160, /* frameSize */
40, /* subframeSize */
10, /* lpcSize */
17, /* pitchStart */
144, /* pitchEnd */
#ifdef FIXED_POINT
29491, 19661, /* gamma1, gamma2 */
#else
0.9, 0.6, /* gamma1, gamma2 */
#endif
QCONST16(. 0002, 15), /* lpc_floor */
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{ 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};
/* Default mode for narrowband */ (modes.c --- 340)
EXPORT const SpeexMode speex_nb_mode = {
&nb_mode,
nb_mode_query,
" narrowband ",
0,
4,
&nb_encoder_init,
&nb_encoder_destroy, (nb_celp.c)
&nb_encode,
&nb_decoder_init,
&nb_decoder_destroy,
&nb_decode,
&nb_encoder_ctl,
&nb_decoder_ctl,
};
/* * Struct defining a Speex mode */ (speex.h-- 248)
typedef struct SpeexMode {
/* * Pointer to the low-level mode data */
const void *mode;
/* * Pointer to the mode query function */
mode_query_func query;
/* * The name of the mode (you should not rely on this to identify the mode) */
const char *modeName;
/* *ID of the mode */
int modeID;
/* *Version number of the bitstream (incremented every time we break
bitstream compatibility */
int bitstream_version;
/* * Pointer to encoder initialization function */
encoder_init_func enc_init;
/* * Pointer to encoder destruction function */
encoder_destroy_func enc_destroy;
/* * Pointer to frame encoding function */
encode_func enc;
/* * Pointer to decoder initialization function */
decoder_init_func dec_init;
/* * Pointer to decoder destruction function */
decoder_destroy_func dec_destroy;
/* * Pointer to frame decoding function */
decode_func dec;
/* * ioctl-like requests for encoder */
encoder_ctl_func enc_ctl;
/* * ioctl-like requests for decoder */
decoder_ctl_func dec_ctl;
} SpeexMode;
/* *Structure representing the full state of the narrowband encoder */(nb_celp.h-- 49)
typedef struct EncState {
const SpeexMode *mode; /* *< Mode corresponding to the state */
int first; /* *< Is this the first frame? */
int frameSize; /* *< Size of frames */
int subframeSize; /* *< Size of sub-frames */
int nbSubframes; /* *< Number of sub-frames */
int windowSize; /* *< Analysis (LPC) window length */
int lpcSize; /* *< LPC order */
int min_pitch; /* *< Minimum pitch value allowed */
int max_pitch; /* *< Maximum pitch value allowed */
spx_word32_t cumul_gain; /* *< Product of previously used pitch gains (Q10) */
int bounded_pitch; /* *< Next frame should not rely on previous frames for pitch */
int ol_pitch; /* *< Open-loop pitch */
int ol_voiced; /* *< Open-loop voiced/non-voiced decision */
int *pitch;
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1; /* *< Perceptual filter: A(z/gamma1) */
spx_word16_t gamma2; /* *< Perceptual filter: A(z/gamma2) */
spx_word16_t lpc_floor; /* *< Noise floor multiplier for A[0] in LPC analysis */
char *stack; /* *< Pseudo-stack allocation for temporary memory */
spx_word16_t *winBuf; /* *< Input buffer (original signal) */
spx_word16_t *excBuf; /* *< Excitation buffer */
spx_word16_t *exc; /* *< Start of excitation frame */
spx_word16_t *swBuf; /* *< Weighted signal buffer */
spx_word16_t *sw; /* *< Start of weighted signal frame */
const spx_word16_t *window; /* *< Temporary (Hanning) window */
const spx_word16_t *lagWindow; /* *< Window applied to auto-correlation */
spx_lsp_t *old_lsp; /* *< LSPs for previous frame */
spx_lsp_t *old_qlsp; /* *< Quantized LSPs for previous frame */
spx_mem_t *mem_sp; /* *< Filter memory for signal synthesis */
spx_mem_t *mem_sw; /* *< Filter memory for perceptually-weighted signal */
spx_mem_t *mem_sw_whole; /* *< Filter memory for perceptually-weighted signal (whole frame) */
spx_mem_t *mem_exc; /* *< Filter memory for excitation (whole frame) */
spx_mem_t *mem_exc2; /* *< Filter memory for excitation (whole frame) */
spx_mem_t mem_hp[ 2]; /* *< High-pass filter memory */
spx_word32_t *pi_gain; /* *< Gain of LPC filter at theta=pi (fe/2) */
spx_word16_t *innov_rms_save; /* *< If non-NULL, innovation RMS is copied here */
#ifndef DISABLE_VBR
VBRState *vbr; /* *< State of the VBR data */
float vbr_quality; /* *< Quality setting for VBR encoding */
float relative_quality; /* *< Relative quality that will be needed by VBR */
spx_int32_t vbr_enabled; /* *< 1 for enabling VBR, 0 otherwise */
spx_int32_t vbr_max; /* *< Max bit-rate allowed in VBR mode */
int vad_enabled; /* *< 1 for enabling VAD, 0 otherwise */
int dtx_enabled; /* *< 1 for enabling DTX, 0 otherwise */
int dtx_count; /* *< Number of consecutive DTX frames */
spx_int32_t abr_enabled; /* *< ABR setting (in bps), 0 if off */
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity; /* *< Complexity setting (0-10 from least complex to most complex) */
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode * const *submodes; /* *< Sub-mode data */
int submodeID; /* *< Activated sub-mode */
int submodeSelect; /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
int isWideband; /* *< Is this used as part of the embedded wideband codec */
int highpass_enabled; /* *< Is the input filter enabled */
} EncState;
speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c-- 1196)
st->submodeSelect = st->submodeID = (( const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值:{ 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
st->submodeSelect: /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
st->submodeID:**< Activated sub-mode */
/* * Struct defining the encoding/decoding mode for SB-CELP (wideband) */
typedef struct SpeexSBMode {
const SpeexMode *nb_mode; /* *< Embedded narrowband mode */
int frameSize; /* *< Size of frames used for encoding */
int subframeSize; /* *< Size of sub-frames used for encoding */
int lpcSize; /* *< Order of LPC filter */
spx_word16_t gamma1; /* *< Perceptual filter parameter #1 */
spx_word16_t gamma2; /* *< Perceptual filter parameter #1 */
spx_word16_t lpc_floor; /* *< Noise floor for LPC analysis */
spx_word16_t folding_gain;
const SpeexSubmode *submodes[SB_SUBMODES]; /* *< Sub-mode data for the mode */
int defaultSubmode; /* *< Default sub-mode to use when encoding */
int low_quality_map[ 11]; /* *< Mode corresponding to each quality setting */
int quality_map[ 11]; /* *< Mode corresponding to each quality setting */
#ifndef DISABLE_VBR
const float (*vbr_thresh)[ 11];
#endif
int nb_modes;
} SpeexSBMode;
quality与bit-rate对照表:
mode quality bit-rate mflops quality/description
1 0 2, 150 6 Vocoder (mostly for comfort noise)
2 2 5, 950 9 Very noticeable artifacts/noise, good intelligibility
3 3- 4 8, 000 10 Artifacts/noise sometimes noticeable
4 5- 6 11, 000 14 Artifacts usually noticeable only with headphones
5 7- 8 15, 000 11 Need good headphones to tell the difference
6 9 18, 200 17.5 Hard to tell the difference even with good headphones
7 10 24, 600 14.5 Completely transparent for voice, good quality music
8 1 3, 950 10.5
speex_encode_int(enc_state, input_frame, &bits);
EXPORT int speex_encode_int( void *state, spx_int16_t * in, SpeexBits *bits)
{
int i;
spx_int32_t N;
float float_in[MAX_IN_SAMPLES];
speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
for (i= 0;i<N;i++)
float_in[i] = in[i];
return (*((SpeexMode**)state))->enc(state, float_in, bits); // nb_encode (cb_clep.c--252)
}
nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);
EXPORT int speex_bits_write(SpeexBits *bits, char *chars, int max_nbytes) (bits.-- 188)
{
int i;
int max_nchars = max_nbytes/BYTES_PER_CHAR;
int charPtr, bitPtr, nbBits;
/* Insert terminator, but save the data so we can put it back after */
bitPtr=bits->bitPtr;
charPtr=bits->charPtr;
nbBits=bits->nbBits;
speex_bits_insert_terminator(bits);
bits->bitPtr=bitPtr;
bits->charPtr=charPtr;
bits->nbBits=nbBits;
if (max_nchars > ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR))
max_nchars = ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR);
for (i= 0;i<max_nchars;i++)
chars[i]=HTOLS(bits->chars[i]);
return max_nchars*BYTES_PER_CHAR;
}
typedef struct SpeexBits {
char *chars; /* *< "raw" data */
int nbBits; /* *< Total number of bits stored in the stream */
int charPtr; /* *< Position of the byte "cursor" */
int bitPtr; /* *< Position of the bit "cursor" within the current char */
int owner; /* *< Does the struct "own" the "raw" buffer (member "chars") */
int overflow; /* *< Set to one if we try to read past the valid data */
int buf_size; /* *< Allocated size for buffer */
int reserved1; /* *< Reserved for future use */
void *reserved2; /* *< Reserved for future use */
} SpeexBits;
EXPORT void speex_bits_init(SpeexBits *bits) (bits.c-- 48)
{
bits->chars = ( char*)speex_alloc(MAX_CHARS_PER_FRAME);
if (!bits->chars)
return;
bits->buf_size = MAX_CHARS_PER_FRAME;
bits->owner= 1;
speex_bits_reset(bits);
}
enc_state = speex_encoder_init(&speex_nb_mode);
typedef struct SpeexNBMode { (modes.h-- 117)
int frameSize; /* *< Size of frames used for encoding */
int subframeSize; /* *< Size of sub-frames used for encoding */
int lpcSize; /* *< Order of LPC filter */
int pitchStart; /* *< Smallest pitch value allowed */
int pitchEnd; /* *< Largest pitch value allowed */
spx_word16_t gamma1; /* *< Perceptual filter parameter #1 */
spx_word16_t gamma2; /* *< Perceptual filter parameter #2 */
spx_word16_t lpc_floor; /* *< Noise floor for LPC analysis */
const SpeexSubmode *submodes[NB_SUBMODES]; /* *< Sub-mode data for the mode */
int defaultSubmode; /* *< Default sub-mode to use when encoding */
int quality_map[ 11]; /* *< Mode corresponding to each quality setting */
} SpeexNBMode;
/* Default mode for narrowband */ (modes.c --- 320)
static const SpeexNBMode nb_mode = {
160, /* frameSize */
40, /* subframeSize */
10, /* lpcSize */
17, /* pitchStart */
144, /* pitchEnd */
#ifdef FIXED_POINT
29491, 19661, /* gamma1, gamma2 */
#else
0.9, 0.6, /* gamma1, gamma2 */
#endif
QCONST16(. 0002, 15), /* lpc_floor */
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{ 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};
/* Default mode for narrowband */ (modes.c --- 340)
EXPORT const SpeexMode speex_nb_mode = {
&nb_mode,
nb_mode_query,
" narrowband ",
0,
4,
&nb_encoder_init,
&nb_encoder_destroy, (nb_celp.c)
&nb_encode,
&nb_decoder_init,
&nb_decoder_destroy,
&nb_decode,
&nb_encoder_ctl,
&nb_decoder_ctl,
};
/* * Struct defining a Speex mode */ (speex.h-- 248)
typedef struct SpeexMode {
/* * Pointer to the low-level mode data */
const void *mode;
/* * Pointer to the mode query function */
mode_query_func query;
/* * The name of the mode (you should not rely on this to identify the mode) */
const char *modeName;
/* *ID of the mode */
int modeID;
/* *Version number of the bitstream (incremented every time we break
bitstream compatibility */
int bitstream_version;
/* * Pointer to encoder initialization function */
encoder_init_func enc_init;
/* * Pointer to encoder destruction function */
encoder_destroy_func enc_destroy;
/* * Pointer to frame encoding function */
encode_func enc;
/* * Pointer to decoder initialization function */
decoder_init_func dec_init;
/* * Pointer to decoder destruction function */
decoder_destroy_func dec_destroy;
/* * Pointer to frame decoding function */
decode_func dec;
/* * ioctl-like requests for encoder */
encoder_ctl_func enc_ctl;
/* * ioctl-like requests for decoder */
decoder_ctl_func dec_ctl;
} SpeexMode;
/* *Structure representing the full state of the narrowband encoder */(nb_celp.h-- 49)
typedef struct EncState {
const SpeexMode *mode; /* *< Mode corresponding to the state */
int first; /* *< Is this the first frame? */
int frameSize; /* *< Size of frames */
int subframeSize; /* *< Size of sub-frames */
int nbSubframes; /* *< Number of sub-frames */
int windowSize; /* *< Analysis (LPC) window length */
int lpcSize; /* *< LPC order */
int min_pitch; /* *< Minimum pitch value allowed */
int max_pitch; /* *< Maximum pitch value allowed */
spx_word32_t cumul_gain; /* *< Product of previously used pitch gains (Q10) */
int bounded_pitch; /* *< Next frame should not rely on previous frames for pitch */
int ol_pitch; /* *< Open-loop pitch */
int ol_voiced; /* *< Open-loop voiced/non-voiced decision */
int *pitch;
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1; /* *< Perceptual filter: A(z/gamma1) */
spx_word16_t gamma2; /* *< Perceptual filter: A(z/gamma2) */
spx_word16_t lpc_floor; /* *< Noise floor multiplier for A[0] in LPC analysis */
char *stack; /* *< Pseudo-stack allocation for temporary memory */
spx_word16_t *winBuf; /* *< Input buffer (original signal) */
spx_word16_t *excBuf; /* *< Excitation buffer */
spx_word16_t *exc; /* *< Start of excitation frame */
spx_word16_t *swBuf; /* *< Weighted signal buffer */
spx_word16_t *sw; /* *< Start of weighted signal frame */
const spx_word16_t *window; /* *< Temporary (Hanning) window */
const spx_word16_t *lagWindow; /* *< Window applied to auto-correlation */
spx_lsp_t *old_lsp; /* *< LSPs for previous frame */
spx_lsp_t *old_qlsp; /* *< Quantized LSPs for previous frame */
spx_mem_t *mem_sp; /* *< Filter memory for signal synthesis */
spx_mem_t *mem_sw; /* *< Filter memory for perceptually-weighted signal */
spx_mem_t *mem_sw_whole; /* *< Filter memory for perceptually-weighted signal (whole frame) */
spx_mem_t *mem_exc; /* *< Filter memory for excitation (whole frame) */
spx_mem_t *mem_exc2; /* *< Filter memory for excitation (whole frame) */
spx_mem_t mem_hp[ 2]; /* *< High-pass filter memory */
spx_word32_t *pi_gain; /* *< Gain of LPC filter at theta=pi (fe/2) */
spx_word16_t *innov_rms_save; /* *< If non-NULL, innovation RMS is copied here */
#ifndef DISABLE_VBR
VBRState *vbr; /* *< State of the VBR data */
float vbr_quality; /* *< Quality setting for VBR encoding */
float relative_quality; /* *< Relative quality that will be needed by VBR */
spx_int32_t vbr_enabled; /* *< 1 for enabling VBR, 0 otherwise */
spx_int32_t vbr_max; /* *< Max bit-rate allowed in VBR mode */
int vad_enabled; /* *< 1 for enabling VAD, 0 otherwise */
int dtx_enabled; /* *< 1 for enabling DTX, 0 otherwise */
int dtx_count; /* *< Number of consecutive DTX frames */
spx_int32_t abr_enabled; /* *< ABR setting (in bps), 0 if off */
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity; /* *< Complexity setting (0-10 from least complex to most complex) */
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode * const *submodes; /* *< Sub-mode data */
int submodeID; /* *< Activated sub-mode */
int submodeSelect; /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
int isWideband; /* *< Is this used as part of the embedded wideband codec */
int highpass_enabled; /* *< Is the input filter enabled */
} EncState;
speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c-- 1196)
st->submodeSelect = st->submodeID = (( const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值:{ 1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
st->submodeSelect: /* *< Mode chosen by the user (may differ from submodeID if VAD is on) */
st->submodeID:**< Activated sub-mode */
/* * Struct defining the encoding/decoding mode for SB-CELP (wideband) */
typedef struct SpeexSBMode {
const SpeexMode *nb_mode; /* *< Embedded narrowband mode */
int frameSize; /* *< Size of frames used for encoding */
int subframeSize; /* *< Size of sub-frames used for encoding */
int lpcSize; /* *< Order of LPC filter */
spx_word16_t gamma1; /* *< Perceptual filter parameter #1 */
spx_word16_t gamma2; /* *< Perceptual filter parameter #1 */
spx_word16_t lpc_floor; /* *< Noise floor for LPC analysis */
spx_word16_t folding_gain;
const SpeexSubmode *submodes[SB_SUBMODES]; /* *< Sub-mode data for the mode */
int defaultSubmode; /* *< Default sub-mode to use when encoding */
int low_quality_map[ 11]; /* *< Mode corresponding to each quality setting */
int quality_map[ 11]; /* *< Mode corresponding to each quality setting */
#ifndef DISABLE_VBR
const float (*vbr_thresh)[ 11];
#endif
int nb_modes;
} SpeexSBMode;
quality与bit-rate对照表:
mode quality bit-rate mflops quality/description
1 0 2, 150 6 Vocoder (mostly for comfort noise)
2 2 5, 950 9 Very noticeable artifacts/noise, good intelligibility
3 3- 4 8, 000 10 Artifacts/noise sometimes noticeable
4 5- 6 11, 000 14 Artifacts usually noticeable only with headphones
5 7- 8 15, 000 11 Need good headphones to tell the difference
6 9 18, 200 17.5 Hard to tell the difference even with good headphones
7 10 24, 600 14.5 Completely transparent for voice, good quality music
8 1 3, 950 10.5
speex_encode_int(enc_state, input_frame, &bits);
EXPORT int speex_encode_int( void *state, spx_int16_t * in, SpeexBits *bits)
{
int i;
spx_int32_t N;
float float_in[MAX_IN_SAMPLES];
speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
for (i= 0;i<N;i++)
float_in[i] = in[i];
return (*((SpeexMode**)state))->enc(state, float_in, bits); // nb_encode (cb_clep.c--252)
}
nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);
EXPORT int speex_bits_write(SpeexBits *bits, char *chars, int max_nbytes) (bits.-- 188)
{
int i;
int max_nchars = max_nbytes/BYTES_PER_CHAR;
int charPtr, bitPtr, nbBits;
/* Insert terminator, but save the data so we can put it back after */
bitPtr=bits->bitPtr;
charPtr=bits->charPtr;
nbBits=bits->nbBits;
speex_bits_insert_terminator(bits);
bits->bitPtr=bitPtr;
bits->charPtr=charPtr;
bits->nbBits=nbBits;
if (max_nchars > ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR))
max_nchars = ((bits->nbBits+BITS_PER_CHAR- 1)>>LOG2_BITS_PER_CHAR);
for (i= 0;i<max_nchars;i++)
chars[i]=HTOLS(bits->chars[i]);
return max_nchars*BYTES_PER_CHAR;
}