/*---------------------------------------------------------------------------*\

    FILE....: echo.c

    Host based echo canceller based on two path model.


\*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*\

         Voicetronix Voice Processing Board (VPB) Software

         Copyright (C) 1999-2001 Voicetronix www.voicetronix.com.au

         This library is free software; you can redistribute it and/or
         modify it under the terms of the GNU Lesser General Public
         License as published by the Free Software Foundation; either
         version 2.1 of the License, or (at your option) any later version.

         This library is distributed in the hope that it will be useful,
         but WITHOUT ANY WARRANTY; without even the implied warranty of
         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
         Lesser General Public License for more details.

         You should have received a copy of the GNU Lesser General Public
         License along with this library; if not, write to the Free Software
         Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
	 USA

\*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*\

                                   INCLUDES

\*---------------------------------------------------------------------------*/

#include "echo.h"
#include "float_dotprod_x86.h"

/*---------------------------------------------------------------------------*\

                                   DEFINES

\*---------------------------------------------------------------------------*/

#define ALPHA     0.02   // short term coeff
#define BETA      0.1 	 // adap const
#define CUTOFF    130.0  // -48dBm0 Ly adaption cutoff
#define HANGT     600    // echo supp hangover 75ms at Fs=8000Hz
#define THRESHOLD 0.125  // -18dB echo supressor threshold
#define WINDOW    2000   // analysis window size for two path model
#define MINUS3DB  0.71   // -3dB threshold


static float float_dotprod_vanilla(const float x[],const  float y[],unsigned int n);
typedef float (*fd_t)(const float x[],const  float y[], unsigned int n);
fd_t float_dotprod;
static void update_loop(float a2[], float x, float y[], int n);
static void update_loop_unrolled(float a2[], float x, float y[], int n);
static float myrand(unsigned long *seed);

extern void *echo_malloc(int size);
extern void echo_free(void *mem);
extern int printk(const char * fmt, ...);
extern double fabs(double x);


static int has_sse=0;
static int has_3dnow=0;

/*--------------------------------------------------------------------------*\

	FUNCTION.: echo_open()
	AUTHOR...: David Rowe
	DATE.....: 23/9/02

	Opens an echo canceller, and initialises the state variables for that
	echo canceller.

\*--------------------------------------------------------------------------*/

int echo_open(void **pv) {
	int  i;
	ECHO_SV *pe;
	pe = (ECHO_SV*)echo_malloc((int)sizeof(ECHO_SV));

	if (pe == 0){
		return 1;
	}

	for(i=0; i<ECHO_TAPS; i++) {
		pe->a[i] = 0.0;
		pe->a2[i] = 0.0;
		pe->a3[i] = 0.0;
		pe->memLy[i] = 0.0;
	}

	pe->Ly = CUTOFF;
	pe->Le = CUTOFF;
	pe->Le2 = CUTOFF;
	pe->Le3 = CUTOFF;
	pe->Ls = CUTOFF;
	pe->lyi = 0;
	pe->hang = 0;
	pe->adapt = 1;
	pe->sup = 0;
	pe->dt = 0;
	pe->beta = BETA;
	pe->ok_count = 0;
	pe->Ey = 1.0;
	pe->y_oldest = 0.0;
	pe->seed = 1000;
	*pv = (void*)pe;

	float_dotprod=float_dotprod_vanilla;
	return 0;
}
/*--------------------------------------------------------------------------*\

	FUNCTION.: echo_set_opt()
	AUTHOR...: Ben Kramer
	DATE.....: 24/06/03

	Sets which code to use for the float_dotprod

\*--------------------------------------------------------------------------*/

void echo_set_opt(int method) {
	switch(method){
		case 0:
			float_dotprod=float_dotprod_vanilla;
		break;
		case 1:
			float_dotprod=float_dotprod_3dnow;
		break;
		/* Not supported yet
		case 2:
			float_dotprod=float_dotprod_sse;
		break;
		*/
	}
}
/*--------------------------------------------------------------------------*\

	FUNCTION.: echo_close()
	AUTHOR...: David Rowe
	DATE.....: 23/9/02

	Frees any memory used by the echo canceller.

\*--------------------------------------------------------------------------*/

void echo_close(void *pv) {
	echo_free(pv);
}

/*--------------------------------------------------------------------------*\

	FUNCTION.: echo()
	AUTHOR...: David Rowe
	DATE.....: 23/9/02

	This functions performs the actual echo cancellation.

\*--------------------------------------------------------------------------*/

void echo(void *pv,   // ptr to state variables 
	  float e[],  // output
	  float y[],  // input (far end)
	  float s[],  // reference (near end) (NOTE: not same array as e!)
	  int n       // number of samples
) {
	ECHO_SV *pe = (ECHO_SV*)pv;
	float echo;
	//#define COMF_NOISE
	#ifdef COMF_NOISE
	float noise;
	#endif
	#ifdef NOT_USED
	float maxLy;
	float *memLy = pe->memLy;
	#endif
	int   i,k;
	float *a = pe->a;
	float *a2 = pe->a2;
	float *a3 = pe->a3;
	float Ly = pe->Ly;
	float Le = pe->Le;
	float Le2 = pe->Le2;
	float Le3 = pe->Le3;
	float Ls = pe->Ls;
	int   lyi = pe->lyi;
	int   hang = pe->hang;
	float Ey = pe->Ey;
	int   adapt = pe->adapt;
	float e2;
	float beta = pe->beta;
	int ok_count = pe->ok_count;
	float x;
	float y_oldest = pe->y_oldest;
        float tmp;

	pe->sup = 0;
	pe->dt = 0;

	for(i=0; i<n; i++) {
		// update short term pwr estimates

		Ly = (1.0-ALPHA)*Ly + ALPHA*(fabs(y[i])+1);
		Ls = (1.0-ALPHA)*Ls + ALPHA*(fabs(s[i])+1);

		#ifdef NOT_USED
		// Geigel double talk detector (just used for 
		// supressor, not adapation control)

		memLy[lyi] = Ly;
		maxLy = 0.0;
		for(k=0; k<ECHO_TAPS; k++)
			if (memLy[k] > maxLy)
				maxLy = memLy[k];

		lyi++;
		if (lyi == ECHO_TAPS) lyi = 0;		
				
		if (Ls > 0.71*maxLy) {
			hang = HANGT;
		}

		if (hang) {
			hang--;
			pe->dt = 1;
		}
		#endif
		
		// estimate echo

		//#define DR_OLD
		echo = 0.0;
		#ifdef DR_OLD
		for(k=0; k<ECHO_TAPS; k++) {
		   echo += a[k]*y[i-ECHO_TAPS+1+k];
		}
		#else
		//echo = float_dotprod_3dnow(a, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);
		echo = (*float_dotprod)(a, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);
		#endif
		tmp = s[i] - echo;
		if (tmp > 32767) tmp = 32767;
		if (tmp < -32767) tmp = -32767;
		e[i] = tmp;

		echo = 0.0;
		#ifdef DR_OLD
		for(k=0; k<ECHO_TAPS; k++) {
			echo += a2[k]*y[i-ECHO_TAPS+1+k];
		}
		#else
		//echo = float_dotprod_3dnow(a2, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);
		echo = (*float_dotprod)(a2, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);
		#endif
		e2 = s[i] - echo;
		
		if (e2 >32767) e2 = 32767;
		if (e2 <-32767) e2 = -32767;

		// update echo pwr short term ests, note these float
		// around and are not frozen during doubletalk

		Le2 = (1.0-ALPHA)*Le2 + ALPHA*(fabs(e2)+1);
		Le = (1.0-ALPHA)*Le + ALPHA*(fabs(e[i])+1);
	
		// update energy in filter est
		Ey = Ey + y[i]*y[i] - y_oldest*y_oldest;
		y_oldest = y[i-ECHO_TAPS+1];

		// update filter if:
		// Ls (ref pwr) is at least 3dB less than Ly (input pwr)
		// and Ly > certain minimum

		if ((Ls < Ly*MINUS3DB) && (Ly > CUTOFF) && (adapt)) {

			// if adap error est is at least 3dB better than
			// fixed, see if it stays that way for WINDOW
			// samples

			if (Le2 < MINUS3DB*Le) {
				ok_count++;

				// half way thru, take snap shot of adap coeffs
				if (ok_count == WINDOW/2) {
				  for(k=0; k<ECHO_TAPS; k++)
					a3[k] = a2[k];
				  Le3 = Le2;
				}

				// weve made it - update fixed with adap coeffs
				if (ok_count == WINDOW) {
				  for(k=0; k<ECHO_TAPS; k++)
					a[k] = a3[k];
				  Le = Le3;
				  ok_count = 0;
				}
			}
			else {
				// didnt make it for the whole window, reset
				ok_count = 0;
			}

			// adaption
			#ifdef DR_OLD
			Ey = 1.0;
			for(k=0; k<ECHO_TAPS; k++)
				Ey += y[i-k]*y[i-k];
			#endif
			
		        x = 2*beta*e2/Ey;

			#ifdef DR_OLD
			for(k=0; k<ECHO_TAPS; k++) {
			  a2[k] += x*y[i-ECHO_TAPS+1+k];
			}
			#endif
			update_loop(a2,x,&y[i-ECHO_TAPS+1],ECHO_TAPS);
		}
		else {
			// window must be continuous 
			ok_count = 0;
		}

		// noise not used, but maybe later for comfort noise if
		// we get a reliable estimate for bg noise level (Le is
		// not suitable as it doesnt freeze during doubletalk)
		#ifdef COMF_NOISE
		//noise = 2.0*((float)rand()/RAND_MAX) - 1.0;
		noise = 2.0*myrand(&pe->seed) - 1;
		noise *= Le;
		#endif
		// simple supressor
		hang = 0;
		
		if (hang ==0) {
			if (Le/Ly < THRESHOLD) {
				#ifdef COMF_NOISE
				e[i] = noise;
				#else
				e[i] = 0;
				#endif
				pe->sup = 1;
//				printk("EC: suspresing...\n");
			}
		}		
		
	}

	pe->Ly = Ly;
	pe->Ls = Ls;
	pe->Le = Le;
	pe->Le2 = Le2;
	pe->Le3 = Le3;
	pe->hang = hang;
	pe->lyi = lyi;
	pe->ok_count = ok_count;
	pe->Ey = Ey;
	pe->y_oldest = y_oldest;
}

static float float_dotprod_vanilla(const float a[],const float y[],unsigned int n) {
	int   k;
	float inner;

	inner = 0.0;
	for(k=0; k<n*4; k++)
		inner += a[k]*y[k];

	return inner;
}

static void update_loop(float a2[], float x, float y[], int n) {
  int k;

  for(k=0; k<ECHO_TAPS; k++) {
    a2[k] += x*y[k];
  }
}

static void update_loop_unrolled(float a2[], float x, float y[], int n) {
  int k;

  for(k=0; k<ECHO_TAPS; k++, a2++,y++) {
    *a2 = *a2 + x * *y;
  }
}

unsigned long A = 9301;
unsigned long C = 49297;
unsigned long M = 233280;

static float myrand(unsigned long *seed) {
      *seed = (*seed * A + C) % M;
      return ((float) *seed / (float)M);
}
  
/*--------------------------------------------------------------------------*\

  Access functions used for development and simulation.

\*--------------------------------------------------------------------------*/

void echo_adapt(void *pv, int adapt) {
	ECHO_SV *pe = (ECHO_SV*)pv;

	pe->adapt = adapt;
}

