Commit 4460a860 authored by J.R. Mauro's avatar J.R. Mauro Committed by Greg Kroah-Hartman

Staging: Lindent the echo driver

Lindent drivers/staging/echo*

Signed-off by: J.R. Mauro <jrm8005@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent 786ed801
......@@ -38,11 +38,12 @@ static __inline__ int top_bit(unsigned int bits)
{
int res;
__asm__ (" xorl %[res],%[res];\n"
__asm__(" xorl %[res],%[res];\n"
" decl %[res];\n"
" bsrl %[bits],%[res]\n"
: [res] "=&r" (res)
: [bits] "rm" (bits));
:[res] "=&r" (res)
:[bits] "rm"(bits)
);
return res;
}
......@@ -53,11 +54,12 @@ static __inline__ int bottom_bit(unsigned int bits)
{
int res;
__asm__ (" xorl %[res],%[res];\n"
__asm__(" xorl %[res],%[res];\n"
" decl %[res];\n"
" bsfl %[bits],%[res]\n"
: [res] "=&r" (res)
: [bits] "rm" (bits));
:[res] "=&r" (res)
:[bits] "rm"(bits)
);
return res;
}
#else
......@@ -68,28 +70,23 @@ static __inline__ int top_bit(unsigned int bits)
if (bits == 0)
return -1;
i = 0;
if (bits & 0xFFFF0000)
{
if (bits & 0xFFFF0000) {
bits &= 0xFFFF0000;
i += 16;
}
if (bits & 0xFF00FF00)
{
if (bits & 0xFF00FF00) {
bits &= 0xFF00FF00;
i += 8;
}
if (bits & 0xF0F0F0F0)
{
if (bits & 0xF0F0F0F0) {
bits &= 0xF0F0F0F0;
i += 4;
}
if (bits & 0xCCCCCCCC)
{
if (bits & 0xCCCCCCCC) {
bits &= 0xCCCCCCCC;
i += 2;
}
if (bits & 0xAAAAAAAA)
{
if (bits & 0xAAAAAAAA) {
bits &= 0xAAAAAAAA;
i += 1;
}
......@@ -103,28 +100,23 @@ static __inline__ int bottom_bit(unsigned int bits)
if (bits == 0)
return -1;
i = 32;
if (bits & 0x0000FFFF)
{
if (bits & 0x0000FFFF) {
bits &= 0x0000FFFF;
i -= 16;
}
if (bits & 0x00FF00FF)
{
if (bits & 0x00FF00FF) {
bits &= 0x00FF00FF;
i -= 8;
}
if (bits & 0x0F0F0F0F)
{
if (bits & 0x0F0F0F0F) {
bits &= 0x0F0F0F0F;
i -= 4;
}
if (bits & 0x33333333)
{
if (bits & 0x33333333) {
bits &= 0x33333333;
i -= 2;
}
if (bits & 0x55555555)
{
if (bits & 0x55555555) {
bits &= 0x55555555;
i -= 1;
}
......@@ -139,7 +131,8 @@ static __inline__ uint8_t bit_reverse8(uint8_t x)
{
#if defined(__i386__) || defined(__x86_64__)
/* If multiply is fast */
return ((x*0x0802U & 0x22110U) | (x*0x8020U & 0x88440U))*0x10101U >> 16;
return ((x * 0x0802U & 0x22110U) | (x * 0x8020U & 0x88440U)) *
0x10101U >> 16;
#else
/* If multiply is slow, but we have a barrel shifter */
x = (x >> 4) | (x << 4);
......
......@@ -74,7 +74,6 @@
Steve also has some nice notes on echo cancellers in echo.h
References:
[1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
......@@ -124,9 +123,9 @@
/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
#ifdef __bfin__
static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean,
int shift)
{
int i, j;
int offset1;
......@@ -151,10 +150,9 @@ static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift
//asm("st:");
n = ec->taps;
for (i = 0, j = offset2; i < n; i++, j++)
{
for (i = 0, j = offset2; i < n; i++, j++) {
exp = *phist++ * factor;
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
}
//asm("en:");
......@@ -198,7 +196,8 @@ static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift
*/
#else
static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean,
int shift)
{
int i;
......@@ -217,20 +216,17 @@ static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, int shift
offset2 = ec->curr_pos;
offset1 = ec->taps - offset2;
for (i = ec->taps - 1; i >= offset1; i--)
{
exp = (ec->fir_state_bg.history[i - offset1]*factor);
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
for (i = ec->taps - 1; i >= offset1; i--) {
exp = (ec->fir_state_bg.history[i - offset1] * factor);
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
}
for ( ; i >= 0; i--)
{
exp = (ec->fir_state_bg.history[i + offset2]*factor);
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
for (; i >= 0; i--) {
exp = (ec->fir_state_bg.history[i + offset2] * factor);
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
}
}
#endif
struct oslec_state *oslec_create(int len, int adaption_mode)
{
struct oslec_state *ec;
......@@ -245,19 +241,16 @@ struct oslec_state *oslec_create(int len, int adaption_mode)
ec->curr_pos = ec->taps - 1;
for (i = 0; i < 2; i++) {
ec->fir_taps16[i] = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
ec->fir_taps16[i] =
kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
if (!ec->fir_taps16[i])
goto error_oom;
}
fir16_create(&ec->fir_state,
ec->fir_taps16[0],
ec->taps);
fir16_create(&ec->fir_state_bg,
ec->fir_taps16[1],
ec->taps);
fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
for(i=0; i<5; i++) {
for (i = 0; i < 5; i++) {
ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
}
......@@ -279,13 +272,14 @@ struct oslec_state *oslec_create(int len, int adaption_mode)
return ec;
error_oom:
error_oom:
for (i = 0; i < 2; i++)
kfree(ec->fir_taps16[i]);
kfree(ec);
return NULL;
}
EXPORT_SYMBOL_GPL(oslec_create);
void oslec_free(struct oslec_state *ec)
......@@ -299,12 +293,14 @@ void oslec_free(struct oslec_state *ec)
kfree(ec->snapshot);
kfree(ec);
}
EXPORT_SYMBOL_GPL(oslec_free);
void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
{
ec->adaption_mode = adaption_mode;
}
EXPORT_SYMBOL_GPL(oslec_adaption_mode);
void oslec_flush(struct oslec_state *ec)
......@@ -326,16 +322,19 @@ void oslec_flush(struct oslec_state *ec)
ec->fir_state.curr_pos = ec->taps - 1;
ec->fir_state_bg.curr_pos = ec->taps - 1;
for (i = 0; i < 2; i++)
memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
ec->curr_pos = ec->taps - 1;
ec->Pstates = 0;
}
EXPORT_SYMBOL_GPL(oslec_flush);
void oslec_snapshot(struct oslec_state *ec) {
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
void oslec_snapshot(struct oslec_state *ec)
{
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
}
EXPORT_SYMBOL_GPL(oslec_snapshot);
/* Dual Path Echo Canceller ------------------------------------------------*/
......@@ -350,9 +349,10 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
starts clipping. Another possible way to handle this would be the
filter coefficent scaling. */
ec->tx = tx; ec->rx = rx;
tx >>=1;
rx >>=1;
ec->tx = tx;
ec->rx = rx;
tx >>= 1;
rx >>= 1;
/*
Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
......@@ -378,13 +378,15 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
any saturation should not markedly affect the downstream processing. */
tmp -= (tmp >> 4);
#endif
ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
/* hard limit filter to prevent clipping. Note that at this stage
rx should be limited to +/- 16383 due to right shift above */
tmp1 = ec->rx_1 >> 15;
if (tmp1 > 16383) tmp1 = 16383;
if (tmp1 < -16383) tmp1 = -16383;
if (tmp1 > 16383)
tmp1 = 16383;
if (tmp1 < -16383)
tmp1 = -16383;
rx = tmp1;
ec->rx_2 = tmp;
}
......@@ -398,36 +400,38 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
/* efficient "out with the old and in with the new" algorithm so
we don't have to recalculate over the whole block of
samples. */
new = (int)tx * (int)tx;
new = (int)tx *(int)tx;
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
(int)ec->fir_state.history[ec->fir_state.curr_pos];
ec->Pstates += ((new - old) + (1<<ec->log2taps)) >> ec->log2taps;
if (ec->Pstates < 0) ec->Pstates = 0;
ec->Pstates +=
((new - old) + (1 << ec->log2taps)) >> ec->log2taps;
if (ec->Pstates < 0)
ec->Pstates = 0;
}
/* Calculate short term average levels using simple single pole IIRs */
ec->Ltxacc += abs(tx) - ec->Ltx;
ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
ec->Lrxacc += abs(rx) - ec->Lrx;
ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
/* Foreground filter ---------------------------------------------------*/
/* Foreground filter --------------------------------------------------- */
ec->fir_state.coeffs = ec->fir_taps16[0];
echo_value = fir16(&ec->fir_state, tx);
ec->clean = rx - echo_value;
ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
/* Background filter ---------------------------------------------------*/
/* Background filter --------------------------------------------------- */
echo_value = fir16(&ec->fir_state_bg, tx);
clean_bg = rx - echo_value;
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
/* Background Filter adaption -----------------------------------------*/
/* Background Filter adaption ----------------------------------------- */
/* Almost always adap bg filter, just simple DT and energy
detection to minimise adaption in cases of strong double talk.
......@@ -488,41 +492,38 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
if (ec->nonupdate_dwell)
ec->nonupdate_dwell--;
/* Transfer logic ------------------------------------------------------*/
/* Transfer logic ------------------------------------------------------ */
/* These conditions are from the dual path paper [1], I messed with
them a bit to improve performance. */
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
(ec->nonupdate_dwell == 0) &&
(8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
(8*ec->Lclean_bg < ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ )
{
(8 * ec->Lclean_bg <
7 * ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
(8 * ec->Lclean_bg <
ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ ) {
if (ec->cond_met == 6) {
/* BG filter has had better results for 6 consecutive samples */
ec->adapt = 1;
memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
}
else
memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
ec->taps * sizeof(int16_t));
} else
ec->cond_met++;
}
else
} else
ec->cond_met = 0;
/* Non-Linear Processing ---------------------------------------------------*/
/* Non-Linear Processing --------------------------------------------------- */
ec->clean_nlp = ec->clean;
if (ec->adaption_mode & ECHO_CAN_USE_NLP)
{
if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
if ((16*ec->Lclean < ec->Ltx))
{
if ((16 * ec->Lclean < ec->Ltx)) {
/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
if (ec->adaption_mode & ECHO_CAN_USE_CNG)
{
if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
ec->cng_level = ec->Lbgn;
/* Very elementary comfort noise generation. Just random
......@@ -533,27 +534,26 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
high level or look at spectrum.
*/
ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
ec->cng_rndnum =
1664525U * ec->cng_rndnum + 1013904223U;
ec->cng_filter =
((ec->cng_rndnum & 0xFFFF) - 32768 +
5 * ec->cng_filter) >> 3;
ec->clean_nlp =
(ec->cng_filter * ec->cng_level * 8) >> 14;
}
else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
{
} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
/* This sounds much better than CNG */
if (ec->clean_nlp > ec->Lbgn)
ec->clean_nlp = ec->Lbgn;
if (ec->clean_nlp < -ec->Lbgn)
ec->clean_nlp = -ec->Lbgn;
}
else
{
} else {
/* just mute the residual, doesn't sound very good, used mainly
in G168 tests */
ec->clean_nlp = 0;
}
}
else {
} else {
/* Background noise estimator. I tried a few algorithms
here without much luck. This very simple one seems to
work best, we just average the level using a slow (1 sec
......@@ -564,7 +564,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
*/
if (ec->Lclean < 40) {
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
}
}
}
......@@ -581,6 +581,7 @@ int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
return (int16_t) ec->clean_nlp << 1;
}
EXPORT_SYMBOL_GPL(oslec_update);
/* This function is seperated from the echo canceller is it is usually called
......@@ -604,7 +605,8 @@ EXPORT_SYMBOL_GPL(oslec_update);
precision, which noise shapes things, giving very clean DC removal.
*/
int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) {
int16_t oslec_hpf_tx(struct oslec_state * ec, int16_t tx)
{
int tmp, tmp1;
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
......@@ -616,16 +618,19 @@ int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) {
any saturation should not markedly affect the downstream processing. */
tmp -= (tmp >> 4);
#endif
ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
tmp1 = ec->tx_1 >> 15;
if (tmp1 > 32767) tmp1 = 32767;
if (tmp1 < -32767) tmp1 = -32767;
if (tmp1 > 32767)
tmp1 = 32767;
if (tmp1 < -32767)
tmp1 = -32767;
tx = tmp1;
ec->tx_2 = tmp;
}
return tx;
}
EXPORT_SYMBOL_GPL(oslec_hpf_tx);
MODULE_LICENSE("GPL");
......
......@@ -124,9 +124,8 @@ a minor burden.
G.168 echo canceller descriptor. This defines the working state for a line
echo canceller.
*/
struct oslec_state
{
int16_t tx,rx;
struct oslec_state {
int16_t tx, rx;
int16_t clean;
int16_t clean_nlp;
......
......@@ -72,8 +72,7 @@
16 bit integer FIR descriptor. This defines the working state for a single
instance of an FIR filter using 16 bit integer coefficients.
*/
typedef struct
{
typedef struct {
int taps;
int curr_pos;
const int16_t *coeffs;
......@@ -85,8 +84,7 @@ typedef struct
instance of an FIR filter using 32 bit integer coefficients, and filtering
16 bit integer data.
*/
typedef struct
{
typedef struct {
int taps;
int curr_pos;
const int32_t *coeffs;
......@@ -97,39 +95,37 @@ typedef struct
Floating point FIR descriptor. This defines the working state for a single
instance of an FIR filter using floating point coefficients and data.
*/
typedef struct
{
typedef struct {
int taps;
int curr_pos;
const float *coeffs;
float *history;
} fir_float_state_t;
static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
const int16_t *coeffs,
int taps)
static __inline__ const int16_t *fir16_create(fir16_state_t * fir,
const int16_t * coeffs, int taps)
{
fir->taps = taps;
fir->curr_pos = taps - 1;
fir->coeffs = coeffs;
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
fir->history = kcalloc(2*taps, sizeof(int16_t), GFP_KERNEL);
fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
#else
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
#endif
return fir->history;
}
static __inline__ void fir16_flush(fir16_state_t *fir)
static __inline__ void fir16_flush(fir16_state_t * fir)
{
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
#else
memset(fir->history, 0, fir->taps*sizeof(int16_t));
memset(fir->history, 0, fir->taps * sizeof(int16_t));
#endif
}
static __inline__ void fir16_free(fir16_state_t *fir)
static __inline__ void fir16_free(fir16_state_t * fir)
{
kfree(fir->history);
}
......@@ -141,9 +137,7 @@ static inline int32_t dot_asm(short *x, short *y, int len)
len--;
__asm__
(
"I0 = %1;\n\t"
__asm__("I0 = %1;\n\t"
"I1 = %2;\n\t"
"A0 = 0;\n\t"
"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
......@@ -154,16 +148,16 @@ static inline int32_t dot_asm(short *x, short *y, int len)
"A0 += R0.L*R1.L (IS);\n\t"
"R0 = A0;\n\t"
"%0 = R0;\n\t"
: "=&d" (dot)
: "a" (x), "a" (y), "a" (len)
: "I0", "I1", "A1", "A0", "R0", "R1"
:"=&d"(dot)
:"a"(x), "a"(y), "a"(len)
:"I0", "I1", "A1", "A0", "R0", "R1"
);
return dot;
}
#endif
static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
static __inline__ int16_t fir16(fir16_state_t * fir, int16_t sample)
{
int32_t y;
#if defined(USE_MMX)
......@@ -175,12 +169,11 @@ static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
fir->history[fir->curr_pos + fir->taps] = sample;
mmx_coeffs = (mmx_t *) fir->coeffs;
mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
mmx_hist = (mmx_t *) & fir->history[fir->curr_pos];
i = fir->taps;
pxor_r2r(mm4, mm4);
/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
while (i > 0)
{
while (i > 0) {
movq_m2r(mmx_coeffs[0], mm0);
movq_m2r(mmx_coeffs[1], mm2);
movq_m2r(mmx_hist[0], mm1);
......@@ -207,12 +200,11 @@ static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
fir->history[fir->curr_pos + fir->taps] = sample;
xmm_coeffs = (xmm_t *) fir->coeffs;
xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
xmm_hist = (xmm_t *) & fir->history[fir->curr_pos];
i = fir->taps;
pxor_r2r(xmm4, xmm4);
/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
while (i > 0)
{
while (i > 0) {
movdqu_m2r(xmm_coeffs[0], xmm0);
movdqu_m2r(xmm_coeffs[1], xmm2);
movdqu_m2r(xmm_hist[0], xmm1);
......@@ -235,7 +227,8 @@ static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
#elif defined(__bfin__)
fir->history[fir->curr_pos] = sample;
fir->history[fir->curr_pos + fir->taps] = sample;
y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps);
y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
fir->taps);
#else
int i;
int offset1;
......@@ -247,9 +240,9 @@ static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
offset1 = fir->taps - offset2;
y = 0;
for (i = fir->taps - 1; i >= offset1; i--)
y += fir->coeffs[i]*fir->history[i - offset1];
for ( ; i >= 0; i--)
y += fir->coeffs[i]*fir->history[i + offset2];
y += fir->coeffs[i] * fir->history[i - offset1];
for (; i >= 0; i--)
y += fir->coeffs[i] * fir->history[i + offset2];
#endif
if (fir->curr_pos <= 0)
fir->curr_pos = fir->taps;
......@@ -257,9 +250,8 @@ static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
return (int16_t) (y >> 15);
}
static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
const int32_t *coeffs,
int taps)
static __inline__ const int16_t *fir32_create(fir32_state_t * fir,
const int32_t * coeffs, int taps)
{
fir->taps = taps;
fir->curr_pos = taps - 1;
......@@ -268,17 +260,17 @@ static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
return fir->history;
}
static __inline__ void fir32_flush(fir32_state_t *fir)
static __inline__ void fir32_flush(fir32_state_t * fir)
{
memset(fir->history, 0, fir->taps*sizeof(int16_t));
memset(fir->history, 0, fir->taps * sizeof(int16_t));
}
static __inline__ void fir32_free(fir32_state_t *fir)
static __inline__ void fir32_free(fir32_state_t * fir)
{
kfree(fir->history);
}
static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
static __inline__ int16_t fir32(fir32_state_t * fir, int16_t sample)
{
int i;
int32_t y;
......@@ -290,9 +282,9 @@ static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
offset1 = fir->taps - offset2;
y = 0;
for (i = fir->taps - 1; i >= offset1; i--)
y += fir->coeffs[i]*fir->history[i - offset1];
for ( ; i >= 0; i--)
y += fir->coeffs[i]*fir->history[i + offset2];
y += fir->coeffs[i] * fir->history[i - offset1];
for (; i >= 0; i--)
y += fir->coeffs[i] * fir->history[i + offset2];
if (fir->curr_pos <= 0)
fir->curr_pos = fir->taps;
fir->curr_pos--;
......
......@@ -44,7 +44,6 @@ typedef union {
char b[16];
} xmm_t;
#define mmx_i2r(op,imm,reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
......@@ -63,7 +62,6 @@ typedef union {
#define mmx_r2r(op,regs,regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
#define emms() __asm__ __volatile__ ("emms")
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
......@@ -192,16 +190,13 @@ typedef union {
#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
/* 3DNOW extensions */
#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
/* AMD MMX extensions - also available in intel SSE */
#define mmx_m2ri(op,mem,reg,imm) \
__asm__ __volatile__ (#op " %1, %0, %%" #reg \
: /* nothing */ \
......@@ -216,7 +211,6 @@ typedef union {
: /* nothing */ \
: "m" (mem))
#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
......@@ -284,5 +278,4 @@ typedef union {
#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
#endif /* AVCODEC_I386MMX_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment