/*
 *      fir_x86.h -- digital filter routines (to be) optimized for x86 CPUs
 *
 *	Copyright (C) 1996  
 *          Thomas Sailer (sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu)
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *  The digital filter routines. This is the most performance critical
 *  part of the fax demodulator. These routines are separated to facilitate
 *  optimizing them (i.e. writing them in assembler and carefully
 *  hand scheduling instructions).
 *
 *  Note that float_fir_symm and float_fir_symm_hilbert do the same as
 *  float_fir. They expect however that the coefficients have even symmetry
 *  (float_fir_symm, coeff[i] == coeff[n-1-i]) or odd symmetry
 *  (float_fir_symm_hilbert, coeff[i] == -coeff[n-1-i]). Depending on the CPU 
 *  architecture, these routines may just be aliases for float_fir, or exploit
 *  symmetry of the coefficients. Neither routine should assume any symmetry
 *  in state.
 */

/* ---------------------------------------------------------------------- */

#include <string.h>

/* ---------------------------------------------------------------------- */
/*
 * the x86 routines
 */
/* ---------------------------------------------------------------------- */

__FP float float_conv(const float *fp1, const float *fp2, int n)
{
	float res = 0;
	for(; n > 0; n--)
		res += (*fp1++) * (*fp2++);
	return res;
}

/* ---------------------------------------------------------------------- */

__FP float float_fir(float newval, float *state, const float *coeff, int n)
{
	float res = 0;
	float *fp1 = state+n-2;
	const float *fp2 = coeff+n-1;

	for(; n > 1; n--) {
		fp1[1] = fp1[0];
		res += (*fp1--) * (*fp2--);
	}
	fp1[1] = newval;
	res += newval * (*fp2);
	return res;		
}

/* ---------------------------------------------------------------------- */

__FP float float_fir_symm(float newval, float *state, const float *coeff, 
			  int n)
{
	float res = 0;
	const float *fp1 = state;
	const float *fp2 = state+n-1;
	const float *fp3 = coeff;
	int i = (n/2);

	memmove(state+1, state, (n-1)*sizeof(float));
	state[0] = newval;
	for(; i > 0; i--)
		res += ((*fp1++) + (*fp2--)) * (*fp3++);
	if (n & 1)
		res += (*fp1) * (*fp3);
	return res;
}

/* ---------------------------------------------------------------------- */

__FP float float_fir_symm_hilbert(float newval, float *state, 
				  const float *coeff, int n)
{
	float res = 0;
	const float *fp1 = state;
	const float *fp2 = state+n-1;
	const float *fp3 = coeff;
	int i = (n/2);

	memmove(state+1, state, (n-1)*sizeof(float));
	state[0] = newval;
	for(; i > 0; i--)
		res += ((*fp1++) - (*fp2--)) * (*fp3++);
	/*
	 * the center tap of odd length hilbert FIR filters must be zero!
	 */
	return res;
}

/* ---------------------------------------------------------------------- */
