/*
karlin.c
Authors:  Stephen Altschul and Warren Gish
*/
/*
Comment from original blast programs:
**************** Statistical Significance Parameter Subroutine ****************

    Version 1.0     February 2, 1990
    Version 1.2     July 6,     1990

    Program by:     Stephen Altschul

    Address:        National Center for Biotechnology Information
                    National Library of Medicine
                    National Institutes of Health
                    Bethesda, MD  20894

    Internet:       altschul@ncbi.nlm.nih.gov

See:  Karlin, S. & Altschul, S.F. "Methods for Assessing the Statistical
    Significance of Molecular Sequence Features by Using General Scoring
    Schemes,"  Proc. Natl. Acad. Sci. USA 87 (1990), 2264-2268.

    Computes the parameters lambda and K for use in calculating the
    statistical significance of high-scoring segments or subalignments.

    The scoring scheme must be integer valued.  A positive score must be
    possible, but the expected (mean) score must be negative.

    A program that calls this routine must provide the value of the lowest
    possible score, the value of the greatest possible score, and a pointer
    to an array of probabilities for the occurence of all scores between
    these two extreme scores.  For example, if score -2 occurs with
    probability 0.7, score 0 occurs with probability 0.1, and score 3
    occurs with probability 0.2, then the subroutine must be called with
    low = -2, high = 3, and pr pointing to the array of values
    { 0.7, 0.0, 0.1, 0.0, 0.0, 0.2 }.  The calling program must also provide
    pointers to lambda and K; the subroutine will then calculate the values
    of these two parameters.  In this example, lambda=0.330 and K=0.154.

    The parameters lambda and K can be used as follows.  Suppose we are
    given a length N random sequence of independent letters.  Associated
    with each letter is a score, and the probabilities of the letters
    determine the probability for each score.  Let S be the aggregate score
    of the highest scoring contiguous segment of this sequence.  Then if N
    is sufficiently large (greater than 100), the following bound on the
    probability that S is greater than or equal to x applies:

            P( S >= x )   <=   1 - exp [ - KN exp ( - lambda * x ) ].

    In other words, the p-value for this segment can be written as
    1-exp[-KN*exp(-lambda*S)].

    This formula can be applied to pairwise sequence comparison by assigning
    scores to pairs of letters (e.g. amino acids), and by replacing N in the
    formula with N*M, where N and M are the lengths of the two sequences
    being compared.

    In addition, letting y = KN*exp(-lambda*S), the p-value for finding m
    distinct segments all with score >= S is given by:

                           2             m-1           -y
            1 - [ 1 + y + y /2! + ... + y   /(m-1)! ] e

    Notice that for m=1 this formula reduces to 1-exp(-y), which is the same
    as the previous formula.

*******************************************************************************/
#include <ncbi.h>
#include <blast/blast.h>


BLAST_Error
BlastKarlinBlkCalc(kbp, sfp)
	BLAST_KarlinBlkPtr	kbp;
	BLAST_ScoreFreqPtr	sfp;
{
	/* Calculate the parameter Lambda */

	kbp->Lambda_real = kbp->Lambda = BlastKarlinLambdaNR(sfp);
	if (kbp->Lambda < 0.)
		goto ErrExit;


	/* Calculate H */

	kbp->H_real = kbp->H = BlastKarlinLtoH(sfp, kbp->Lambda);
	if (kbp->H < 0.)
		goto ErrExit;


	/* Calculate K and log(K) */

	kbp->K_real = kbp->K = BlastKarlinLHtoK(sfp, kbp->Lambda, kbp->H);
	if (kbp->K < 0.)
		goto ErrExit;
	kbp->logK_real = kbp->logK = log(kbp->K);

	/* Normal return */
	return BLAST_ERR_NONE;

ErrExit:
	kbp->Lambda = kbp->H = kbp->K = kbp->logK_real = kbp->logK
		= kbp->Lambda_real = kbp->H_real = kbp->K_real = -1.;
	return blast_errno;
}

#define DIMOFP0(iter,range)	(iter*range + 1)
#define DIMOFP0_MAX DIMOFP0(BLAST_KARLIN_K_ITER_MAX,BLAST_SCORE_RANGE_MAX)

double
BlastKarlinLHtoK(sfp, lambda, H)
	BLAST_ScoreFreqPtr	sfp;
	double	lambda, H;
{
#ifndef BLAST_KARLIN_STACKP
	double	PNTR P0 = NULL;
#else
	double	P0 [DIMOFP0_MAX];
#endif
	BLAST_Score	low;	/* Lowest score (must be negative) */
	BLAST_Score	high;	/* Highest score (must be positive) */
	double	K;			/* local copy of K */
	double	ratio;
	int		i, j;
	BLAST_Score	range, lo, hi, first, last;
	register double	sum;
	double	Sum, av, oldsum, oldsum2;
	int		iter;
	double	sumlimit, x;
	double	PNTR p, PNTR ptrP, PNTR ptr1, PNTR ptr2, PNTR ptr1e;
	double	etolami, etolam;

	if (lambda <= 0. || H <= 0.) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -1.;
	}

	if (sfp->score_avg >= 0.0) {
		blast_errno = BLAST_ERR_SCORE_AVG;
		return -1.;
	}

	low = sfp->obs_min;
	high = sfp->obs_max;

	if (low == -1 || high == 1) {
		if (low == -1 && high == 1) {
			x = (sfp->sprob[low] - sfp->sprob[high]);
			K = x * x / sfp->sprob[low];
			return K;
		}
		av = H / lambda;
		if (high == 1)
			K = av;
		else
			K = (sfp->score_avg * sfp->score_avg) / av;
		K *= -Nlm_Expm1(-lambda);
		return K;
	}
	if (high == -low) {
		/* see if there is only one positive score observed, high */
		for (first = 1; first < high; ++first) {
			if (sfp->sprob[first] > 0.)
				break;
		}
		if (first == high) {
			av = H / (lambda *= high);
			K = av * -Nlm_Expm1(-lambda);
			return K;
		}
	}
#if 0
	/* see if there is only one negative score observed, low */
	for (first = -1; first > low; --first) {
		if (sfp->sprob[first] > 0.)
			break;
	}
	/* the following code block is wrong--the computation doesn't scale!? */
	if (first == low) {
		av = H / (lambda * low);
		x = sfp->score_avg / low;
		x = (x * x) / av;
		K = x * Nlm_Expm1(lambda * low);
		return K;
	}
#endif

	sumlimit = blast_config->K_sumlimit;
	if (sumlimit > 0.1 || sumlimit < 1.e-6)
		sumlimit = BLAST_KARLIN_K_SUMLIMIT_DEFAULT;

	range = high - low;
	iter = blast_config->K_iter;
	iter = MIN(iter, BLAST_KARLIN_K_ITER_MAX);

	if (DIMOFP0(iter,range) > DIMOFP0_MAX) {
		blast_errno = BLAST_ERR_RANGE;
		return -1.;
	}
#ifndef BLAST_KARLIN_STACKP
	P0 = (double PNTR)BlastCalloc(DIMOFP0(iter,range) * sizeof(*P0));
	if (P0 == NULL)
		return -1.;
#else
	Nlm_MemSet((CharPtr)P0, 0, DIMOFP0(iter,range)*sizeof(P0[0]));
#endif

	av = H / lambda;
	etolam = exp((double)lambda);
	Sum = 0.;
	lo = hi = 0;
	p = &sfp->sprob[low];
	P0[0] = sum = oldsum = oldsum2 = 1.;
    for (j = 0; j < iter && sum > sumlimit; Sum += sum /= ++j) {
        first = last = range;
		lo += low;
		hi += high;
        for (ptrP = P0+(hi-lo); ptrP >= P0; *ptrP-- =sum) {
            ptr1 = ptrP - first;
            ptr1e = ptrP - last;
            ptr2 = p + first;
            for (sum = 0.; ptr1 >= ptr1e; )
                sum += *ptr1--  *  *ptr2++;
            if (first)
                --first;
            if (ptrP - P0 <= range)
                --last;
        }
		etolami = Nlm_Powi((double)etolam, lo - 1);
        for (sum = 0., i = lo; i != 0; ++i) {
			etolami *= etolam;
            sum += *++ptrP * etolami;
		}
        for (; i <= hi; ++i)
            sum += *++ptrP;
		oldsum2 = oldsum;
		oldsum = sum;
    }

	/* Terms of geometric progression added for correction */
	ratio = oldsum / oldsum2;
	if (ratio >= (1.0 - sumlimit*0.001)) {
		blast_errno = BLAST_ERR_SCORE_AVG;
		K = -1.;
		goto CleanUp;
	}
	sumlimit *= 0.01;
	while (sum > sumlimit) {
		oldsum *= ratio;
		Sum += sum = oldsum / ++j;
	}

    for (i = 1, j = -low; i <= range && j > 1; ++i)
        if (p[i])
            j = Nlm_Gcd(j, i);

	if (j*etolam > 0.05) {
		etolami = Nlm_Powi((double)etolam, -j);
    	K = j*exp((double)-2.0*Sum) / (av*(1.0 - etolami));
	}
	else
	    K = -j*exp((double)-2.0*Sum) / (av*Nlm_Expm1(-j*(double)lambda));

CleanUp:
#ifndef BLAST_KARLIN_K_STACKP
	if (P0 != NULL)
		BlastFree((Nlm_VoidPtr)P0);
#endif
	return K;
}

/*
	BlastKarlinLambdaBis

	Calculate Lambda using the bisection method (slow).
*/
double
BlastKarlinLambdaBis(sfp)
	BLAST_ScoreFreqPtr	sfp;
{
	register double	PNTR sprob;
	double	lambda, up, newval;
	BLAST_Score	i, low, high;
	int		j;
	register double	sum, x0, x1;

	if (sfp->score_avg >= 0.) {
		blast_errno = BLAST_ERR_SCORE_AVG;
		return -1.;
	}
	low = sfp->obs_min;
	high = sfp->obs_max;
	if (BlastScoreChk(low, high) != BLAST_ERR_NONE)
		return -1.;

	sprob = sfp->sprob;
	up = blast_config->Lambda0;
	for (lambda=0.; ; ) {
		up *= 2;
		x0 = exp((double)up);
		x1 = Nlm_Powi((double)x0, low - 1);
		if (x1 > 0.) {
			for (sum=0., i=low; i<=high; ++i)
				sum += sprob[i] * (x1 *= x0);
		}
		else {
			for (sum=0., i=low; i<=high; ++i)
				sum += sprob[i] * exp(up * i);
		}
		if (sum >= 1.0)
			break;
		lambda = up;
	}

	for (j=0; j<blast_config->Lambda_iter; ++j) {
		newval = (lambda + up) / 2.;
		x0 = exp((double)newval);
		x1 = Nlm_Powi((double)x0, low - 1);
		if (x1 > 0.) {
			for (sum=0., i=low; i<=high; ++i)
				sum += sprob[i] * (x1 *= x0);
		}
		else {
			for (sum=0., i=low; i<=high; ++i)
				sum += sprob[i] * exp(newval * i);
		}
		if (sum > 1.0)
			up = newval;
		else
			lambda = newval;
	}
	return (lambda + up) / 2.;
}

/******************* Fast Lambda Calculation Subroutine ************************
	Version 1.0	May 16, 1991
	Program by:	Stephen Altschul

	Uses Newton-Raphson method (fast) to solve for Lambda, given an initial
	guess (lambda0) obtained perhaps by the bisection method.
*******************************************************************************/

double
BlastKarlinLambdaNR(sfp)
	BLAST_ScoreFreqPtr	sfp;
{
	BLAST_Score	low;			/* Lowest score (must be negative)  */
	BLAST_Score	high;			/* Highest score (must be positive) */
	int		j;
	BLAST_Score	i;
	double PNTR	sprob;
	double	lambda0, sum, slope, temp, x0, x1, amt;

	low = sfp->obs_min;
	high = sfp->obs_max;
	if (sfp->score_avg >= 0.) {	/* Expected score must be negative */
		blast_errno = BLAST_ERR_SCORE_AVG;
		return -1.0;
	}
	if (BlastScoreChk(low, high) != BLAST_ERR_NONE)
		return -1.;

	/* Calculate lambda */

	/* special case, see Karlin & Altschul, PNAS 87:2268 */
	if (sfp->obs_min == -1 && sfp->obs_max == 1)
		return log(sfp->sprob[sfp->obs_min] / sfp->sprob[sfp->obs_max]);
	if (-sfp->obs_min == sfp->obs_max) {
		/* see if there are no other scores besides obs_min, obs_max, and 0 */
		for (i = sfp->obs_min + 1; i < sfp->obs_max; ++i) {
			if (i == 0)
				continue;
			if (sfp->sprob[i] > 0.)
				break;
		}
		if (i == sfp->obs_max) {
			temp = log(sfp->sprob[sfp->obs_min] / sfp->sprob[sfp->obs_max]);
			return temp / sfp->obs_max;
		}
	}

	lambda0 = blast_config->Lambda0;
	sprob = sfp->sprob;
	for (j=0; j<20; ++j) { /* limit of 20 should never be close-approached */
		sum = -1.0;
		slope = 0.0;
		if (lambda0 < 0.01)
			break;
		x0 = exp((double)lambda0);
		x1 = Nlm_Powi((double)x0, low - 1);
		if (x1 == 0.)
			break;
		for (i=low; i<=high; ++i) {
			sum += (temp = sprob[i] * (x1 *= x0));
			slope += temp * i;
		}
		lambda0 -= (amt = sum/slope);
		if (ABS(amt/lambda0) < blast_config->Lambda_accuracy) {
			/*
			Does it appear that we may be on the verge of converging
			to the ever-present, but uninteresting, zero-valued solution?
			*/
			if (lambda0 > blast_config->Lambda_accuracy)
				return lambda0;
			break;
		}
	}
	return BlastKarlinLambdaBis(sfp);
}

double
BlastKarlinLambda0(lambda0)
	double	lambda0;
{
	double	x;

	if (lambda0 < 0.01 || lambda0 > 5.) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -1.;
	}
	x = blast_config->Lambda0;
	blast_config->Lambda0 = lambda0;
	return x;
}

double
BlastKarlinLambdaAccuracy(digits)
	double	digits; /* desired no. of decimal digits of accuracy */
{
	double	x;
	int		i;

	if (digits < 1.) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -1.;
	}
#ifdef DBL_DIG
	digits = MIN(digits, DBL_DIG);
#else
	digits = MIN(digits, 10);
#endif

	i = (x = digits*(NCBIMATH_LN10/NCBIMATH_LN2));
	if (i != x)
		++i;
	blast_config->Lambda_iter = i;
	x = blast_config->Lambda_accuracy;
	blast_config->Lambda_accuracy = EXP10(-digits);
	return x;
}

/*
	BlastKarlinLtoH

	Calculate H, the relative entropy of the p's and q's
*/
double
BlastKarlinLtoH(sfp, lambda)
	BLAST_ScoreFreqPtr	sfp;
	double	lambda;
{
	register BLAST_Score	i;
	register double	av, etolam, etolami;

	if (lambda < 0.) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -1.;
	}
	if (BlastScoreChk(sfp->obs_min, sfp->obs_max) != BLAST_ERR_NONE)
		return -1.;

	etolam = exp((double)lambda);
	etolami = Nlm_Powi((double)etolam, sfp->obs_min - 1);
	if (etolami > 0.) {
	    for (av=0., i=sfp->obs_min; i<=sfp->obs_max; ++i)
   			av += sfp->sprob[i] * i * (etolami *= etolam);
	}
	else {
	    for (av=0., i=sfp->obs_min; i<=sfp->obs_max; ++i)
   			av += sfp->sprob[i] * i * exp(lambda * i);
	}
    return lambda * av;
}

/*
	BlastKarlinStoLen()

	Given a score, return the length expected for an HSP of that score
*/
double
BlastKarlinStoLen(kbp, S)
	BLAST_KarlinBlkPtr	kbp;
	BLAST_Score	S;
{
	return kbp->Lambda * S / kbp->H;
}

/*
	BlastKarlinEtoS() -- given an Expect value, return the associated cutoff score

	Error return value is BLAST_SCORE_MIN
*/
BLAST_Score
BlastKarlinEtoS(E, kbp, qlen, dblen)
	double	E;	/* Expect value */
	BLAST_KarlinBlkPtr	kbp;
	double	qlen;	/* length of query sequence */
	double	dblen;	/* length of database */
{
	double	Lambda, K, H; /* parameters for Karlin statistics */
	BLAST_Score	S;
	double	etolam, xlen, f, g, x, prod1, prod2;
	long	lo, hi;

	Lambda = kbp->Lambda;
	K = kbp->K;
	H = kbp->H;
	if (Lambda < 0. || K < 0. || H < 0.) {
		blast_errno = BLAST_ERR_INVAL;
		return BLAST_SCORE_MIN;
	}

	S = ceil( log((double)(K * qlen * dblen / E)) / Lambda );

	if (blast_config->refined_stats) {
		/* Binary search to find appropriate cutoff */
		etolam = exp(-Lambda);
		hi = S;
		lo = S = 1;
		while (lo < hi) {
			S = lo + (hi - lo)/2;
			x = K * Nlm_Powi((double)etolam, S);
			xlen = BlastKarlinStoLen(kbp, S);
			f = MAX(qlen - xlen, 1.);
			g = MAX(dblen - xlen, 1.);
			prod1 = x*f*g;
			x *= etolam;
			xlen = BlastKarlinStoLen(kbp, S+1);
			f = MAX(qlen - xlen, 1.);
			g = MAX(dblen - xlen, 1.);
			prod2 = x*f*g;
			if (prod1 <= E) {
				if (prod1 == E)
					break;
				hi = S;
			}
			else {
				lo = S+1;
				if (prod2 <= E) {
					S = lo;
					break;
				}
			}
		}
	} /* refined_stats */

	return S;
}

/*
	BlastKarlinStoE() -- given a score, return the associated Expect value

	Error return value is -1.
*/
double
BlastKarlinStoE(S, kbp, qlen, dblen)
	BLAST_Score	S;
	BLAST_KarlinBlkPtr	kbp;
	register double	qlen;	/* length of query sequence */
	register double	dblen;	/* length of database */
{
	register double	Lambda, K, H; /* parameters for Karlin statistics */
	register double	elen;

	Lambda = kbp->Lambda;
	K = kbp->K;
	H = kbp->H;
	if (Lambda < 0. || K < 0. || H < 0.) {
		blast_errno = BLAST_ERR_INVAL;
		return -1.;
	}

	if (blast_config->refined_stats) {
		elen = BlastKarlinStoLen(kbp, S);
		qlen = MAX(1., qlen - elen);
		dblen = MAX(1., dblen - elen);
	}

	return qlen * dblen * exp((double)(-Lambda * S) + kbp->logK);
}

/*
BlastKarlinEtoP -- convert an Expect value to a P-value
*/
double
BlastKarlinEtoP(x)
	double	x;
{
	return -Nlm_Expm1((double)-x);
}

/*
BlastKarlinEtoLogP -- convert an Expect value to a log(P-value)
*/
double
BlastKarlinEtoLogP(x)
	double	x;
{
	if (x < 0.) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -HUGE_VAL;
	}
	if (x <= (10000.*DBL_EPSILON))
		return log(x);
#ifdef DBL_DIG
	if (x >= (DBL_DIG * NCBIMATH_LN10 - 7))
#else
	if (x >= (10 * NCBIMATH_LN10 - 7))
#endif
		return -x;
	return Nlm_Log1p(-exp(-x));
}

/*
BlastKarlinPtoE -- convert a P-value to an Expect value
*/
double
BlastKarlinPtoE(p)
	double	p;
{
	if (p < 0. || p > 1.0) {
		blast_errno = BLAST_ERR_DOMAIN;
		return -HUGE_VAL;
	}
	return -Nlm_Log1p(-p);
}

/*
BlastEtoPoissonP -- convert an Expect value to a Poisson P-value
*/

double
BlastEtoPoissonP(cnt, x)
	unsigned    cnt;
	register double	x;
{
	int		j;
	register unsigned	i;
	register unsigned	imin;
	register double		sum, prod, xi, tail;
	double	accuracy;

	if (cnt < 2) {
		if (cnt == 1)
			return -Nlm_Expm1(-x);
		return 1.;
	}

	j = ceil(x);
	imin = MAX(j, 1) - 1;

	sum = prod = Nlm_Powi(x, cnt) / Nlm_Factorial(cnt);
	/* Using tail approximation suggested by Phil Green */
	accuracy = blast_config->poissonp_accuracy;
	for (xi = i = cnt+1; i <= imin || (tail = prod/(1.-x/xi)) > accuracy*sum; ++i, ++xi) {
		prod *= (x/xi);
		sum += prod;
	}

	sum = exp(-x) * (sum + tail);
	return MIN(sum, 1.);
}

double
BlastGapDecay(pvalue, nsegs, decayrate)
	double	pvalue;
	unsigned	nsegs;
	register double	decayrate;
{
	if (decayrate <= 0. || decayrate >= 1. || nsegs == 0)
		return pvalue;

	return pvalue / ((1. - decayrate) * Nlm_Powi(decayrate, nsegs - 1));
}

double
BlastGapDecayInverse(pvalue, nsegs, decayrate)
	double	pvalue;
	unsigned	nsegs;
	register double	decayrate;
{
	if (decayrate <= 0. || decayrate >= 1. || nsegs == 0)
		return pvalue;

	return pvalue * (1. - decayrate) * Nlm_Powi(decayrate, nsegs - 1);
}



static float	tab2[] = { /* table for r == 2 */
0.01669,  0.0249,   0.03683,  0.05390,  0.07794,  0.1111,   0.1559,   0.2146,   
0.2890,   0.3794,   0.4836,   0.5965,   0.7092,   0.8114,   0.8931,   0.9490,   
0.9806,   0.9944,   0.9989
		};

static float	tab3[] = { /* table for r == 3 */
0.9806,   0.9944,   0.9989,   0.0001682,0.0002542,0.0003829,0.0005745,0.0008587,
0.001278, 0.001893, 0.002789, 0.004088, 0.005958, 0.008627, 0.01240,  0.01770,  
0.02505,  0.03514,  0.04880,  0.06704,  0.09103,  0.1220,   0.1612,   0.2097,   
0.2682,   0.3368,   0.4145,   0.4994,   0.5881,   0.6765,   0.7596,   0.8326,   
0.8922,   0.9367,   0.9667,   0.9846,   0.9939,   0.9980
		};

static float	tab4[] = { /* table for r == 4 */
2.658e-07,4.064e-07,6.203e-07,9.450e-07,1.437e-06,2.181e-06,3.302e-06,4.990e-06,
7.524e-06,1.132e-05,1.698e-05,2.541e-05,3.791e-05,5.641e-05,8.368e-05,0.0001237,
0.0001823,0.0002677,0.0003915,0.0005704,0.0008275,0.001195, 0.001718, 0.002457,
0.003494, 0.004942, 0.006948, 0.009702, 0.01346,  0.01853,  0.02532,  0.03431,
0.04607,  0.06128,  0.08068,  0.1051,   0.1352,   0.1719,   0.2157,   0.2669,
0.3254,   0.3906,   0.4612,   0.5355,   0.6110,   0.6849,   0.7544,   0.8168,
0.8699,   0.9127,   0.9451,   0.9679,   0.9827,   0.9915,   0.9963
		};

static float PNTR table[] = { tab2, tab3, tab4 };
static short tabsize[] = { DIM(tab2)-1, DIM(tab3)-1, DIM(tab4)-1 };

static double	f PROTO((double,Nlm_VoidPtr)), g PROTO((double,Nlm_VoidPtr));

double
BlastSumP(r, s)
	int	r;
	double	s;
{
	if (blast_config->consistency)
		return blast_config->sump(r, s + Nlm_LnGammaInt(r + 1));
	return blast_config->sump(r, s);
}

/*
    Estimate the Sum P-value by calculation or interpolation, as appropriate.
	Approx. 2-1/2 digits accuracy minimum throughout the range of r, s.
	r = number of segments
	s = total score (in nats), adjusted by -r*log(KN)
*/
double
BlastSumPStd(r, s)
	int		r;
	double	s;
{
	int		i, r1, r2;
	double	a;

	if (r == 1)
		return -Nlm_Expm1(-exp(-s));

	if (r <= 4) {
		if (r < 1)
			return 0.;
		r1 = r - 1;
		if (s >= r*r+r1) {
			a = Nlm_LnGammaInt(r+1);
			return r * exp(r1*log(s)-s-a-a);
		}
		if (s > -2*r) {
			/* interpolate */
			i = a = s+s+(4*r);
			a -= i;
			i = tabsize[r2 = r - 2] - i;
			return a*table[r2][i-1] + (1.-a)*table[r2][i];
		}
		return 1.;
	}

	return BlastSumPCalc(r, s);
}

/*
    BlastSumPCalc

    Evaluate the following double integral, where r = number of segments
    and s = the adjusted score in nats:

                    (r-2)         oo           oo
     Prob(r,s) =   r              -            -   (r-2)
                 -------------   |   exp(-y)  |   x   exp(-exp(x - y/r)) dx dy
                 (r-1)! (r-2)!  U            U
                                s            0
*/
double
BlastSumPCalc(r, s)
	int		r;
	double	s;
{
	int		r1, itmin;
	double	t, d;
	double	est_mean, mean, stddev, stddev4;
	double	xr, xr1, xr2, logr;
	double	args[6];

	if (r == 1) {
		if (s > 8.)
			return exp(-s);
		return -Nlm_Expm1(-exp(-s));
	}
	if (r < 1)
		return 0.;

	xr = r;

	if (r < 8) {
		if (s <= -2.3*xr)
			return 1.;
	}
	else if (r < 15) {
			if (s <= -2.5*xr)
				return 1.;
	}
	else if (r < 27) {
			if (s <= -3.0*xr)
				return 1.;
	}
	else if (r < 51) {
			if (s <= -3.4*xr)
				return 1.;
	}
	else if (r < 101) {
			if (s <= -4.0*xr)
				return 1.;
	}

	/* stddev in the limit of infinite r, but quite good for even small r */
	stddev = sqrt(xr);
	stddev4 = 4.*stddev;
	xr1 = r1 = r - 1;

	if (r > 100) {
		/* Calculate lower bound on the mean using inequality log(r) <= r */
		est_mean = -xr * xr1;
		if (s <= est_mean - stddev4)
			return 1.;
	}

	/* mean is rather close to the mode, and the mean is readily calculated */
	/* mean in the limit of infinite r, but quite good for even small r */
	logr = log(xr);
	mean = xr * (1. - logr) - 0.5;
	if (s <= mean - stddev4)
		return 1.;

	if (s >= mean) {
		t = s + 6.*stddev;
		itmin = 1;
	}
	else {
		t = mean + 6.*stddev;
		itmin = 2;
	}

#define ARG_R args[0]
#define ARG_R2 args[1]
#define ARG_ADJ1 args[2]
#define ARG_ADJ2 args[3]
#define ARG_SDIVR args[4]
#define ARG_EPS args[5]

	ARG_R = xr;
	ARG_R2 = xr2 = r - 2;
	ARG_ADJ1 = xr2*logr - Nlm_LnGammaInt(r1) - Nlm_LnGammaInt(r);
	ARG_EPS = blast_config->sump_epsilon;

	do {
		d = Nlm_RombergIntegrate(g, args, s, t, blast_config->sump_epsilon, 0, itmin);
#ifdef HUGE_VAL
		if (d == HUGE_VAL)
			return d;
#endif
	} while (s < mean && d < 0.4 && itmin++ < 4);

	return (d < 1. ? d : 1.);
}

static double
g(s, vp)
	register double	s;
	Nlm_VoidPtr	vp;
{
	register double PNTR	args = vp;
	double	mx;
	
	ARG_ADJ2 = ARG_ADJ1 - s;
	ARG_SDIVR = s / ARG_R;	/* = s / r */
	mx = (s > 0. ? ARG_SDIVR + 3. : 3.);
	return Nlm_RombergIntegrate(f, vp, 0., mx, ARG_EPS, 0, 1);
}

static double
f(x, vp)
	register double	x;
	Nlm_VoidPtr	vp;
{
	register double PNTR	args = vp;
	register double	y;

	y = exp(x - ARG_SDIVR);
#ifdef HUGE_VAL
	if (y == HUGE_VAL)
		return 0.;
#endif
	if (ARG_R2 == 0.)
		return exp(ARG_ADJ2 - y);
	if (x == 0.)
		return 0.;
	return exp(ARG_R2*log(x) + ARG_ADJ2 - y);
}


/* BlastLogSumPCalc -- return the natural log(BlastSumPCalc()) */
double
BlastLogSumPCalc(r, s)
	int	r;
	double	s;
{
	int		r1, itmin;
	double	t, d;
	double	est_mean, mean, stddev, stddev4;
	double	xr, xr1, xr2, logr;
	double	args[6];

	if (r == 1) {
		if (s > 4.) /* < 0.3% error when s = 4 */
			return -s;
		if (s < -3.) {
			if (s < -8.)
				return 0.;
			/* limit of log(1+x) as x approaches 0 = x */
			return -exp(-exp(-s));
		}
		return log(-Nlm_Expm1(-exp(-s)));
	}
	if (r < 1)
#ifdef HUGE_VAL
		return -HUGE_VAL;
#else
		return -1.e30;
#endif

	xr = r;

	if (r < 8) {
		if (s <= -2.3*xr)
			return 0.;
	}
	else if (r < 15) {
			if (s <= -2.5*xr)
				return 0.;
	}
	else if (r < 27) {
			if (s <= -3.0*xr)
				return 0.;
	}
	else if (r < 51) {
			if (s <= -3.4*xr)
				return 0.;
	}
	else if (r < 101) {
			if (s <= -4.0*xr)
				return 0.;
	}

	/* stddev in the limit of infinite r, but quite good for even small r */
	stddev = sqrt(xr);
	stddev4 = 4.*stddev;
	xr1 = r1 = r - 1;

	if (r > 100) {
		/* Calculate lower bound on the mean using inequality log(r) <= r */
		est_mean = -xr * xr1;
		if (s <= est_mean - stddev4)
			return 0.;
	}

	/* mean is rather close to the mode, and the mean is readily calculated */
	/* mean in the limit of infinite r, but quite good for even small r */
	logr = log(xr);
	mean = xr * (1. - logr) - 0.5;
	if (s <= mean - stddev4)
		return 0.;

	if (s >= mean) {
		t = s + 6.*stddev;
		itmin = 1;
	}
	else {
		t = mean + 6.*stddev;
		itmin = 2;
	}

#define ARG_R args[0]
#define ARG_R2 args[1]
#define ARG_ADJ1 args[2]
#define ARG_ADJ2 args[3]
#define ARG_SDIVR args[4]
#define ARG_EPS args[5]

	ARG_R = xr;
	ARG_R2 = xr2 = r - 2;
	ARG_ADJ1 = s;
	ARG_EPS = blast_config->sump_epsilon;

	d = Nlm_RombergIntegrate(g, args, s, t, blast_config->sump_epsilon, 0, itmin);
#ifdef HUGE_VAL
	if (d == HUGE_VAL)
		return d;
#endif
	return log(d) - s + xr2*logr - Nlm_LnGammaInt(r1) - Nlm_LnGammaInt(r);
}

/*
	BlastCutoffs

	Calculate the cutoff score, S, and the highest expected score.

	WRG
*/
BLAST_Error
BlastCutoffs(BLAST_ScorePtr S, /* cutoff score */
	double PNTR E, /* expected no. of HSPs scoring at or above S */
	BLAST_KarlinBlkPtr kbp,
	double qlen, /* length of query sequence */
	double dblen, /* length of database or database sequence */
	Nlm_Boolean dodecay) /* TRUE ==> use gapdecay feature */
{
	register BLAST_Score	s = *S, es;
	register double	e = *E;
	Boolean		s_changed = FALSE;
	double	esave;

	if (kbp->Lambda == -1. || kbp->K == -1. || kbp->H == -1.)
		return blast_errno = BLAST_ERR_INVAL;

	/*
	Calculate a cutoff score, S, from the Expected
	(or desired) number of reported HSPs, E.
	*/
	es = 1;
	esave = e;
	if (e > 0.) {
		if (dodecay)
			e = BlastGapDecayInverse(e, 1, blast_config->gapdecayrate);
		es = BlastKarlinEtoS(e, kbp, qlen, dblen);
	}
	/*
	Pick the larger cutoff score between the user's choice
	and that calculated from the value of E.
	*/
	if (es > s) {
		s_changed = TRUE;
		*S = s = es;
	}

	/*
		Re-calculate E from the cutoff score, if E going in was too high
	*/
	if (esave <= 0. || !s_changed) {
		e = BlastKarlinStoE(s, kbp, dblen, qlen);
		if (dodecay)
			e = BlastGapDecay(e, 1, blast_config->gapdecayrate);
		*E = e;
	}

	return BLAST_ERR_NONE;
}


/*
BlastNWSThreshold

Returns a default neighborhood wordscore threshold to use for sensitive
sequence comparisons.  Default thresholds are only available for
particular wordsizes.  These defaults were empirically determined
to achieve good/high sensitivity, at the expense of a larger neighborhood,
increased storage requirements, and slower speed.

Returns -1 on error.
*/
BLAST_Score
BlastNWSThreshold(kbp, W, sens)
	BLAST_KarlinBlkPtr	kbp;
	int	W;
	enum blast_sensitivity	sens;
{
	BLAST_Score	T;
	double	Lambda, H;

	if (kbp == NULL)
		return -1;
	Lambda = kbp->Lambda;
	H = kbp->H;
	if (Lambda == -1. || H == -1.) {
		blast_errno = BLAST_ERR_INVAL;
		return -1;
	}

	if (sens >= BLAST_SENSITIVITY_HIGH)
	switch (W) {
	case 3:
		T = ceil((4.676 + 1.3*log((double)H))/Lambda - 0.5);
		break;
	case 4:
		T = ceil((double)(2.1 + 3.751*H)/Lambda - 0.5);
		break;
	default:
		blast_errno = BLAST_ERR_WORDSIZE;
		return -1;
	}

	if (sens == BLAST_SENSITIVITY_MEDIUM)
	switch (W) {
	case 3:
		T = ceil((5.076 + 1.3*log(kbp->H))/kbp->Lambda - 0.5);
		break;
	case 4:
		T = ceil((2.8 + 3.751*kbp->H)/kbp->Lambda - 0.5);
		break;
	default:
		blast_errno = BLAST_ERR_WORDSIZE;
		return -1;
	}

	if (sens <= BLAST_SENSITIVITY_LOW)
	switch (W) {
	case 3:
		T = ceil((5.376 + 1.3*log(kbp->H))/kbp->Lambda - 0.5);
		break;
	case 4:
		T = ceil((3.1 + 2.751*kbp->H)/kbp->Lambda - 0.5);
		break;
	default:
		blast_errno = BLAST_ERR_WORDSIZE;
		return -1;
	}

	T = MAX(T, 1);
	return T;
}

/*
	BlastKarlinBtoS

	Converts bits of information into an integral score.

	On error, returns -1.
*/
BLAST_Score
BlastKarlinBtoS(bits, kbp)
	double	bits;
	BLAST_KarlinBlkPtr	kbp;
{
	if (kbp->Lambda == -1. || bits < 0.)
		return -1;

	return (BLAST_Score) ceil((double)(bits/(kbp->Lambda/NCBIMATH_LN2)));
}

/*
	BlastExpHighScore()

	Return the expected high score.
	On error, return BLAST_SCORE_MAX;

*/

BLAST_Score
BlastExpHighScore(kbp, dblen, qlen)
	BLAST_KarlinBlkPtr	kbp;
	double	dblen;	/* length of database */
	double	qlen;	/* length of query sequence */
{
	double	Lambda, K, H; /* parameters for Karlin statistics */
	BLAST_Score	highs, s;
	double	elen, f, g;

	Lambda = kbp->Lambda;
	K = kbp->K;
	H = kbp->H;
	if (Lambda == -1. || K == -1. || H == -1.)
		return BLAST_SCORE_MAX;

	/* Calculate the expected high score */
	highs = log((double)K * qlen * dblen)/Lambda;
	if (blast_config->refined_stats)
		do {
			elen = BlastKarlinStoLen(kbp, highs);
			f = MAX(qlen - elen, 1.);
			g = MAX(dblen - elen, 1.);
			s = log((double)(K * f * g))/Lambda;
		} while (s < highs && (highs = s) > 0);

	return highs;
}

/*
	BlastKarlinStoP
	
	Calculate the probability (as opposed to expectation)
	of achieving a particular score.

	On error, return value is -1. (same as BlastKarlinStoE()).
*/
double
BlastKarlinStoP(S, kbp, dblen, qlen)
	BLAST_Score	S;
	BLAST_KarlinBlkPtr	kbp;
	double	dblen;	/* length of database */
	double	qlen;	/* length of query sequence */
{
	double	x, p;

	x = BlastKarlinStoE(S, kbp, qlen, dblen);
	if (x == -1.)
		return x;
	p = BlastKarlinEtoP(x);

	return p;
}

BLAST_KarlinBlkPtr
BlastKarlinBlkNew()
{
	BLAST_KarlinBlkPtr	kbp;

	kbp = (BLAST_KarlinBlkPtr) BlastCalloc(sizeof(BLAST_KarlinBlk));
	if (kbp == NULL)
		return NULL;
	kbp->Lambda = -1.0;
	kbp->K = -1.0;
	kbp->H = -1.0;
	kbp->gap0 = kbp->gap1 = BLAST_SCORE_MAX; /* ...for ungapped alignments */
	return kbp;
}

void
BlastKarlinBlkDestruct(kbp)
	BLAST_KarlinBlkPtr	kbp;
{
	kbp->Lambda = -1.0;
	kbp->K = -1.0;
	kbp->H = -1.0;
	BlastFree((CharPtr)kbp);
	return;
}

Nlm_Boolean
BlastRefinedStats(Nlm_Boolean flag)
{
	Nlm_Boolean	oflag;

	oflag = blast_config->refined_stats;
	blast_config->refined_stats = flag;
	return oflag;
}
