/*---------------------------------------------------------------------------*\
FILE........: codec2.c
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Codec2 fully quantised encoder and decoder functions. If you want use
codec2, the codec2_xxx functions are for you.
\*---------------------------------------------------------------------------*/
/*
Copyright (C) 2010 David Rowe
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see .
*/
#include
#include
#include
#include
#include
#include
#include "nlp.h"
#include "lpc.h"
#include "quantise.h"
#include "codec2_api.h"
#include "codec2_internal.h"
#define HPF_BETA 0.125
#define BPF_N 101
CKissFFT kiss;
/*---------------------------------------------------------------------------* \
FUNCTION HEADERS
\*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*\
FUNCTIONS
\*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_create
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Create and initialise an instance of the codec. Returns a pointer
to the codec states or NULL on failure. One set of states is
sufficient for a full duuplex codec (i.e. an encoder and decoder).
You don't need separate states for encoders and decoders. See
c2enc.c and c2dec.c for examples.
\*---------------------------------------------------------------------------*/
CCodec2::CCodec2(bool is_3200)
{
c2.mode = is_3200 ? 3200 : 1600;
/* store constants in a few places for convenience */
c2.c2const = c2const_create(8000, N_S);
c2.Fs = c2.c2const.Fs;
int n_samp = c2.n_samp = c2.c2const.n_samp;
int m_pitch = c2.m_pitch = c2.c2const.m_pitch;
c2.Pn.resize(2*n_samp);
c2.Sn_.resize(2*n_samp);
c2.w.resize(m_pitch);
c2.Sn.resize(m_pitch);
for(int i=0; i Aw[FFT_ENC];
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<2; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 2 x 10ms
frames */
model[0].voiced = qt.unpack(bits, &nbit, 1);
model[1].voiced = qt.unpack(bits, &nbit, 1);
Wo_index = qt.unpack(bits, &nbit, WO_BITS);
model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = qt.unpack(bits, &nbit, E_BITS);
e[1] = qt.decode_energy(e_index, E_BITS);
for(i=0; i Aw[FFT_ENC];
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 4 x 10ms
frames */
model[0].voiced = qt.unpack(bits, &nbit, 1);
model[1].voiced = qt.unpack(bits, &nbit, 1);
Wo_index = qt.unpack(bits, &nbit, WO_BITS);
model[1].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = qt.unpack(bits, &nbit, E_BITS);
e[1] = qt.decode_energy(e_index, E_BITS);
model[2].voiced = qt.unpack(bits, &nbit, 1);
model[3].voiced = qt.unpack(bits, &nbit, 1);
Wo_index = qt.unpack(bits, &nbit, WO_BITS);
model[3].Wo = qt.decode_Wo(&c2.c2const, Wo_index, WO_BITS);
model[3].L = PI/model[3].Wo;
e_index = qt.unpack(bits, &nbit, E_BITS);
e[3] = qt.decode_energy(e_index, E_BITS);
for(i=0; i Aw[], float gain)
{
int i;
/* LPC based phase synthesis */
std::complex H[MAX_AMP+1];
sample_phase(model, H, Aw);
phase_synth_zero_order(c2.n_samp, model, &c2.ex_phase, H);
postfilter(model, &c2.bg_est);
synthesise(c2.n_samp, &(c2.fftr_inv_cfg), c2.Sn_.data(), model, c2.Pn.data(), 1);
for(i=0; i 32767.0)
speech[i] = 32767;
else if (c2.Sn_[i] < -32767.0)
speech[i] = -32767;
else
speech[i] = c2.Sn_[i];
}
}
/*---------------------------------------------------------------------------* \
FUNCTION....: analyse_one_frame()
AUTHOR......: David Rowe
DATE CREATED: 23/8/2010
Extract sinusoidal model parameters from 80 speech samples (10ms of
speech).
\*---------------------------------------------------------------------------*/
void CCodec2::analyse_one_frame(MODEL *model, const short *speech)
{
std::complex Sw[FFT_ENC];
float pitch;
int i;
int n_samp = c2.n_samp;
int m_pitch = c2.m_pitch;
/* Read input speech */
for(i=0; iWo = TWO_PI/pitch;
model->L = PI/model->Wo;
/* estimate model parameters */
two_stage_pitch_refinement(&c2.c2const, model, Sw);
/* estimate phases when doing ML experiments */
estimate_amplitudes(model, Sw, 0);
est_voicing_mbe(&c2.c2const, model, Sw, c2.W);
}
/*---------------------------------------------------------------------------* \
FUNCTION....: ear_protection()
AUTHOR......: David Rowe
DATE CREATED: Nov 7 2012
Limits output level to protect ears when there are bit errors or the input
is overdriven. This doesn't correct or mask bit errors, just reduces the
worst of their damage.
\*---------------------------------------------------------------------------*/
void CCodec2::ear_protection(float in_out[], int n)
{
float max_sample, over, gain;
int i;
/* find maximum sample in frame */
max_sample = 0.0;
for(i=0; i max_sample)
max_sample = in_out[i];
/* determine how far above set point */
over = max_sample/30000.0;
/* If we are x dB over set point we reduce level by 2x dB, this
attenuates major excursions in amplitude (likely to be caused
by bit errors) more than smaller ones */
if (over > 1.0)
{
gain = 1.0/(over*over);
for(i=0; i H[],
std::complex A[] /* LPC analysis filter in freq domain */
)
{
int m, b;
float r;
r = TWO_PI/(FFT_ENC);
/* Sample phase at harmonics */
for(m=1; m<=model->L; m++)
{
b = (int)(m*model->Wo/r + 0.5);
H[m] = std::conj(A[b]);
}
}
/*---------------------------------------------------------------------------*\
phase_synth_zero_order()
Synthesises phases based on SNR and a rule based approach. No phase
parameters are required apart from the SNR (which can be reduced to a
1 bit V/UV decision per frame).
The phase of each harmonic is modelled as the phase of a synthesis
filter excited by an impulse. In many Codec 2 modes the synthesis
filter is a LPC filter. Unlike the first order model the position
of the impulse is not transmitted, so we create an excitation pulse
train using a rule based approach.
Consider a pulse train with a pulse starting time n=0, with pulses
repeated at a rate of Wo, the fundamental frequency. A pulse train
in the time domain is equivalent to harmonics in the frequency
domain. We can make an excitation pulse train using a sum of
sinsusoids:
for(m=1; m<=L; m++)
ex[n] = cos(m*Wo*n)
Note: the Octave script ../octave/phase.m is an example of this if
you would like to try making a pulse train.
The phase of each excitation harmonic is:
arg(E[m]) = mWo
where E[m] are the complex excitation (freq domain) samples,
arg(x), just returns the phase of a complex sample x.
As we don't transmit the pulse position for this model, we need to
synthesise it. Now the excitation pulses occur at a rate of Wo.
This means the phase of the first harmonic advances by N_SAMP samples
over a synthesis frame of N_SAMP samples. For example if Wo is pi/20
(200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
first harmonic would advance (pi/20)*80 = 4*pi or two complete
cycles.
We generate the excitation phase of the fundamental (first
harmonic):
arg[E[1]] = Wo*N_SAMP;
We then relate the phase of the m-th excitation harmonic to the
phase of the fundamental as:
arg(E[m]) = m*arg(E[1])
This E[m] then gets passed through the LPC synthesis filter to
determine the final harmonic phase.
Comparing to speech synthesised using original phases:
- Through headphones speech synthesised with this model is not as
good. Through a loudspeaker it is very close to original phases.
- If there are voicing errors, the speech can sound clicky or
staticy. If V speech is mistakenly declared UV, this model tends to
synthesise impulses or clicks, as there is usually very little shift or
dispersion through the LPC synthesis filter.
- When combined with LPC amplitude modelling there is an additional
drop in quality. I am not sure why, theory is interformant energy
is raised making any phase errors more obvious.
NOTES:
1/ This synthesis model is effectively the same as a simple LPC-10
vocoders, and yet sounds much better. Why? Conventional wisdom
(AMBE, MELP) says mixed voicing is required for high quality
speech.
2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
also from MIT) first described this zero phase model, I need to look
up the paper.
3/ Note that this approach could cause some discontinuities in
the phase at the edge of synthesis frames, as no attempt is made
to make sure that the phase tracks are continuous (the excitation
phases are continuous, but not the final phases after filtering
by the LPC spectra). Technically this is a bad thing. However
this may actually be a good thing, disturbing the phase tracks a
bit. More research needed, e.g. test a synthesis model that adds
a small delta-W to make phase tracks line up for voiced
harmonics.
\*---------------------------------------------------------------------------*/
void CCodec2::phase_synth_zero_order(
int n_samp,
MODEL *model,
float *ex_phase, /* excitation phase of fundamental */
std::complex H[] /* L synthesis filter freq domain samples */
)
{
int m;
float new_phi;
std::complex Ex[MAX_AMP+1]; /* excitation samples */
std::complex A_[MAX_AMP+1]; /* synthesised harmonic samples */
/*
Update excitation fundamental phase track, this sets the position
of each pitch pulse during voiced speech. After much experiment
I found that using just this frame's Wo improved quality for UV
sounds compared to interpolating two frames Wo like this:
ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
*/
ex_phase[0] += (model->Wo)*n_samp;
ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
for(m=1; m<=model->L; m++)
{
/* generate excitation */
if (model->voiced)
{
Ex[m] = std::polar(1.0f, ex_phase[0] * m);
}
else
{
/* When a few samples were tested I found that LPC filter
phase is not needed in the unvoiced case, but no harm in
keeping it.
*/
float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
Ex[m] = std::polar(1.0f, phi);
}
/* filter using LPC filter */
A_[m].real(H[m].real() * Ex[m].real() - H[m].imag() * Ex[m].imag());
A_[m].imag(H[m].imag() * Ex[m].real() + H[m].real() * Ex[m].imag());
/* modify sinusoidal phase */
new_phi = atan2f(A_[m].imag(), A_[m].real()+1E-12);
model->phi[m] = new_phi;
}
}
/*---------------------------------------------------------------------------*\
postfilter()
The post filter is designed to help with speech corrupted by
background noise. The zero phase model tends to make speech with
background noise sound "clicky". With high levels of background
noise the low level inter-formant parts of the spectrum will contain
noise rather than speech harmonics, so modelling them as voiced
(i.e. a continuous, non-random phase track) is inaccurate.
Some codecs (like MBE) have a mixed voicing model that breaks the
spectrum into voiced and unvoiced regions. Several bits/frame
(5-12) are required to transmit the frequency selective voicing
information. Mixed excitation also requires accurate voicing
estimation (parameter estimators always break occasionally under
exceptional conditions).
In our case we use a post filter approach which requires no
additional bits to be transmitted. The decoder measures the average
level of the background noise during unvoiced frames. If a harmonic
is less than this level it is made unvoiced by randomising it's
phases.
This idea is rather experimental. Some potential problems that may
happen:
1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
up to speech level? This would be a bad thing.
2/ If background noise suddenly dissapears from the source speech does
estimate drop quickly? What is noise suddenly re-appears?
3/ Background noise with a non-flat sepctrum. Current algorithm just
comsiders spectrum as a whole, but this could be broken up into
bands, each with their own estimator.
4/ Males and females with the same level of background noise. Check
performance the same. Changing Wo affects width of each band, may
affect bg energy estimates.
5/ Not sure what happens during long periods of voiced speech
e.g. "sshhhhhhh"
\*---------------------------------------------------------------------------*/
#define BG_THRESH 40.0 // only consider low levels signals for bg_est
#define BG_BETA 0.1 // averaging filter constant
#define BG_MARGIN 6.0 // harmonics this far above BG noise are
// randomised. Helped make bg noise less
// spikey (impulsive) for mmt1, but speech was
// perhaps a little rougher.
void CCodec2::postfilter( MODEL *model, float *bg_est )
{
int m, uv;
float e, thresh;
/* determine average energy across spectrum */
e = 1E-12;
for(m=1; m<=model->L; m++)
e += model->A[m]*model->A[m];
assert(e > 0.0);
e = 10.0*log10f(e/model->L);
/* If beneath threhold, update bg estimate. The idea
of the threshold is to prevent updating during high level
speech. */
if ((e < BG_THRESH) && !model->voiced)
*bg_est = *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
/* now mess with phases during voiced frames to make any harmonics
less then our background estimate unvoiced.
*/
uv = 0;
thresh = exp10f((*bg_est + BG_MARGIN)/20.0);
if (model->voiced)
for(m=1; m<=model->L; m++)
if (model->A[m] < thresh)
{
model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
uv++;
}
}
C2CONST CCodec2::c2const_create(int Fs, float framelength_s)
{
C2CONST c2const;
assert((Fs == 8000) || (Fs = 16000));
c2const.Fs = Fs;
c2const.n_samp = round(Fs*framelength_s);
c2const.max_amp = floor(Fs*P_MAX_S/2);
c2const.p_min = floor(Fs*P_MIN_S);
c2const.p_max = floor(Fs*P_MAX_S);
c2const.m_pitch = floor(Fs*M_PITCH_S);
c2const.Wo_min = TWO_PI/c2const.p_max;
c2const.Wo_max = TWO_PI/c2const.p_min;
if (Fs == 8000)
{
c2const.nw = 279;
}
else
{
c2const.nw = 511; /* actually a bit shorter in time but lets us maintain constant FFT size */
}
c2const.tw = Fs*TW_S;
/*
fprintf(stderr, "max_amp: %d m_pitch: %d\n", c2const.n_samp, c2const.m_pitch);
fprintf(stderr, "p_min: %d p_max: %d\n", c2const.p_min, c2const.p_max);
fprintf(stderr, "Wo_min: %f Wo_max: %f\n", c2const.Wo_min, c2const.Wo_max);
fprintf(stderr, "nw: %d tw: %d\n", c2const.nw, c2const.tw);
*/
return c2const;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: make_analysis_window
AUTHOR......: David Rowe
DATE CREATED: 11/5/94
Init function that generates the time domain analysis window and it's DFT.
\*---------------------------------------------------------------------------*/
void CCodec2::make_analysis_window(C2CONST *c2const, FFT_STATE *fft_fwd_cfg, float w[], float W[])
{
float m;
std::complex wshift[FFT_ENC];
int i,j;
int m_pitch = c2const->m_pitch;
int nw = c2const->nw;
/*
Generate Hamming window centered on M-sample pitch analysis window
0 M/2 M-1
|-------------|-------------|
|-------|-------|
nw samples
All our analysis/synthsis is centred on the M/2 sample.
*/
m = 0.0;
for(i=0; i temp[FFT_ENC];
for(i=0; i(0.0f, 0.0f);
}
for(i=0; i Sw[], float Sn[], float w[])
{
int i;
int m_pitch = c2const->m_pitch;
int nw = c2const->nw;
for(i=0; i(0.0f, 0.0f);
}
/* Centre analysis window on time axis, we need to arrange input
to FFT this way to make FFT phases correct */
/* move 2nd half to start of FFT input vector */
for(i=0; i Sw[])
{
float pmin,pmax,pstep; /* pitch refinment minimum, maximum and step */
/* Coarse refinement */
pmax = TWO_PI/model->Wo + 5;
pmin = TWO_PI/model->Wo - 5;
pstep = 1.0;
hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
/* Fine refinement */
pmax = TWO_PI/model->Wo + 1;
pmin = TWO_PI/model->Wo - 1;
pstep = 0.25;
hs_pitch_refinement(model,Sw,pmin,pmax,pstep);
/* Limit range */
if (model->Wo < TWO_PI/c2const->p_max)
model->Wo = TWO_PI/c2const->p_max;
if (model->Wo > TWO_PI/c2const->p_min)
model->Wo = TWO_PI/c2const->p_min;
model->L = floorf(PI/model->Wo);
/* trap occasional round off issues with floorf() */
if (model->Wo*model->L >= 0.95*PI)
{
model->L--;
}
assert(model->Wo*model->L < PI);
}
/*---------------------------------------------------------------------------*\
FUNCTION....: hs_pitch_refinement
AUTHOR......: David Rowe
DATE CREATED: 27/5/94
Harmonic sum pitch refinement function.
pmin pitch search range minimum
pmax pitch search range maximum
step pitch search step size
model current pitch estimate in model.Wo
model refined pitch estimate in model.Wo
\*---------------------------------------------------------------------------*/
void CCodec2::hs_pitch_refinement(MODEL *model, std::complex Sw[], float pmin, float pmax, float pstep)
{
int m; /* loop variable */
int b; /* bin for current harmonic centre */
float E; /* energy for current pitch*/
float Wo; /* current "test" fundamental freq. */
float Wom; /* Wo that maximises E */
float Em; /* mamimum energy */
float r, one_on_r; /* number of rads/bin */
float p; /* current pitch */
/* Initialisation */
model->L = PI/model->Wo; /* use initial pitch est. for L */
Wom = model->Wo;
Em = 0.0;
r = TWO_PI/FFT_ENC;
one_on_r = 1.0/r;
/* Determine harmonic sum for a range of Wo values */
for(p=pmin; p<=pmax; p+=pstep)
{
E = 0.0;
Wo = TWO_PI/p;
/* Sum harmonic magnitudes */
for(m=1; m<=model->L; m++)
{
b = (int)(m*Wo*one_on_r + 0.5);
E += Sw[b].real() * Sw[b].real() + Sw[b].imag() * Sw[b].imag();
}
/* Compare to see if this is a maximum */
if (E > Em)
{
Em = E;
Wom = Wo;
}
}
model->Wo = Wom;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: estimate_amplitudes
AUTHOR......: David Rowe
DATE CREATED: 27/5/94
Estimates the complex amplitudes of the harmonics.
\*---------------------------------------------------------------------------*/
void CCodec2::estimate_amplitudes(MODEL *model, std::complex Sw[], int est_phase)
{
int i,m; /* loop variables */
int am,bm; /* bounds of current harmonic */
float den; /* denominator of amplitude expression */
float r = TWO_PI/FFT_ENC;
float one_on_r = 1.0/r;
for(m=1; m<=model->L; m++)
{
/* Estimate ampltude of harmonic */
den = 0.0;
am = (int)((m - 0.5)*model->Wo*one_on_r + 0.5);
bm = (int)((m + 0.5)*model->Wo*one_on_r + 0.5);
for(i=am; iA[m] = sqrtf(den);
if (est_phase)
{
int b = (int)(m*model->Wo/r + 0.5); /* DFT bin of centre of current harmonic */
/* Estimate phase of harmonic, this is expensive in CPU for
embedded devicesso we make it an option */
model->phi[m] = atan2f(Sw[b].imag(), Sw[b].real());
}
}
}
/*---------------------------------------------------------------------------*\
est_voicing_mbe()
Returns the error of the MBE cost function for a fiven F0.
Note: I think a lot of the operations below can be simplified as
W[].imag = 0 and has been normalised such that den always equals 1.
\*---------------------------------------------------------------------------*/
float CCodec2::est_voicing_mbe( C2CONST *c2const, MODEL *model, std::complex Sw[], float W[])
{
int l,al,bl,m; /* loop variables */
std::complex Am; /* amplitude sample for this band */
int offset; /* centers Hw[] about current harmonic */
float den; /* denominator of Am expression */
float error; /* accumulated error between original and synthesised */
float Wo;
float sig, snr;
float elow, ehigh, eratio;
float sixty;
std::complex Ew(0, 0);
int l_1000hz = model->L*1000.0/(c2const->Fs/2);
sig = 1E-4;
for(l=1; l<=l_1000hz; l++)
{
sig += model->A[l]*model->A[l];
}
Wo = model->Wo;
error = 1E-4;
/* Just test across the harmonics in the first 1000 Hz */
for(l=1; l<=l_1000hz; l++)
{
Am = std::complex(0.0f, 0.0f);
den = 0.0;
al = ceilf((l - 0.5)*Wo*FFT_ENC/TWO_PI);
bl = ceilf((l + 0.5)*Wo*FFT_ENC/TWO_PI);
/* Estimate amplitude of harmonic assuming harmonic is totally voiced */
offset = FFT_ENC/2 - l*Wo*FFT_ENC/TWO_PI + 0.5;
for(m=al; m V_THRESH)
model->voiced = 1;
else
model->voiced = 0;
/* post processing, helps clean up some voicing errors ------------------*/
/*
Determine the ratio of low freqency to high frequency energy,
voiced speech tends to be dominated by low frequency energy,
unvoiced by high frequency. This measure can be used to
determine if we have made any gross errors.
*/
int l_2000hz = model->L*2000.0/(c2const->Fs/2);
int l_4000hz = model->L*4000.0/(c2const->Fs/2);
elow = ehigh = 1E-4;
for(l=1; l<=l_2000hz; l++)
{
elow += model->A[l]*model->A[l];
}
for(l=l_2000hz; l<=l_4000hz; l++)
{
ehigh += model->A[l]*model->A[l];
}
eratio = 10.0*log10f(elow/ehigh);
/* Look for Type 1 errors, strongly V speech that has been
accidentally declared UV */
if (model->voiced == 0)
if (eratio > 10.0)
model->voiced = 1;
/* Look for Type 2 errors, strongly UV speech that has been
accidentally declared V */
if (model->voiced == 1)
{
if (eratio < -10.0)
model->voiced = 0;
/* A common source of Type 2 errors is the pitch estimator
gives a low (50Hz) estimate for UV speech, which gives a
good match with noise due to the close harmoonic spacing.
These errors are much more common than people with 50Hz3
pitch, so we have just a small eratio threshold. */
sixty = 60.0*TWO_PI/c2const->Fs;
if ((eratio < -4.0) && (model->Wo <= sixty))
model->voiced = 0;
}
//printf(" v: %d snr: %f eratio: %3.2f %f\n",model->voiced,snr,eratio,dF0);
return snr;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: make_synthesis_window
AUTHOR......: David Rowe
DATE CREATED: 11/5/94
Init function that generates the trapezoidal (Parzen) sythesis window.
\*---------------------------------------------------------------------------*/
void CCodec2::make_synthesis_window(C2CONST *c2const, float Pn[])
{
int i;
float win;
int n_samp = c2const->n_samp;
int tw = c2const->tw;
/* Generate Parzen window in time domain */
win = 0.0;
for(i=0; i Sw_[FFT_DEC/2+1]; /* DFT of synthesised signal */
float sw_[FFT_DEC]; /* synthesised signal */
if (shift)
{
/* Update memories */
for(i=0; iL; l++)
{
b = (int)(l*model->Wo*FFT_DEC/TWO_PI + 0.5);
if (b > ((FFT_DEC/2)-1))
{
b = (FFT_DEC/2)-1;
}
Sw_[b] = std::polar(model->A[l], model->phi[l]);
}
/* Perform inverse DFT */
kiss.fftri(*fftr_inv_cfg, Sw_,sw_);
/* Overlap add to previous samples */
for(i=0; ivoiced && !prev->voiced && !next->voiced)
{
interp->voiced = 0;
}
/* Wo depends on voicing of this and adjacent frames */
if (interp->voiced)
{
if (prev->voiced && next->voiced)
interp->Wo = (1.0 - weight)*prev->Wo + weight*next->Wo;
if (!prev->voiced && next->voiced)
interp->Wo = next->Wo;
if (prev->voiced && !next->voiced)
interp->Wo = prev->Wo;
}
else
{
interp->Wo = Wo_min;
}
interp->L = PI/interp->Wo;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: interp_energy()
AUTHOR......: David Rowe
DATE CREATED: 22 May 2012
Interpolates centre 10ms sample of energy given two samples 20ms
apart.
\*---------------------------------------------------------------------------*/
float CCodec2::interp_energy(float prev_e, float next_e)
{
//return powf(10.0, (log10f(prev_e) + log10f(next_e))/2.0);
return sqrtf(prev_e * next_e); //looks better is math. identical and faster math
}
/*---------------------------------------------------------------------------*\
FUNCTION....: interpolate_lsp_ver2()
AUTHOR......: David Rowe
DATE CREATED: 22 May 2012
Weighted interpolation of LSPs.
\*---------------------------------------------------------------------------*/
void CCodec2::interpolate_lsp_ver2(float interp[], float prev[], float next[], float weight, int order)
{
int i;
for(i=0; i