#include <ncbi.h>
#include "blastapp.h"
#include "aabet.h"


#ifndef WORDSIZE_MAX
#define WORDSIZE_MAX	12
#endif
/* ----------   build table of amino acid neighborhood words  -----------*/

static void	enumerate PROTO((int i, Score_t score));
static void	addpat PROTO((CharPtr query, unsigned char *wword));

static DFAPtr	dp;

static unsigned char wword[WORDSIZE_MAX];	/* candidate for critical word */

static Score_t
	best[WORDSIZE_MAX];	/* best[i] = smallest score of wword[0]..wword[i]
				   that can be extended to a critical word */
static char	*q;			/* pointer into query sequence */
static char	*q_seq;
static unsigned long	nwords, nexact;
static int		f;
static int		alpha;
static size_t	W;
static Score_t	T;
static Score_t	(*matrx)[ALPHAVAL_MAX+1][ALPHAVAL_MAX+1];
static char	(*ord)[ALPHAVAL_MAX+1][ALPHASIZE_MAX];
static int	wm1;
static char	*funcname = "bld_table_aa";

DFAPtr
bld_table_aa(theDFA, rframe, qseq, qlen, wordsize, threshold, alphasize,
						matrix, maxcost, order, numwords, numexact)
	DFAPtr	theDFA;
	int		rframe;
	char	*qseq;
	size_t	qlen;
	int		wordsize;
	Score_t	threshold;
	int		alphasize;	/* size of the alphabet */
			/* max. cost of substituting for a character */
			/* matrix[i][j] == the cost of replacing i by j */
	Score_t (*matrix)[ALPHAVAL_MAX+1][ALPHAVAL_MAX+1],
			(*maxcost)[ALPHAVAL_MAX+1];
			/* order[i][j] == the j-th best character to substitute for i */
	char	(*order)[ALPHAVAL_MAX+1][ALPHASIZE_MAX];
	unsigned long	*numwords, *numexact;
{
	register int	i;
	register Score_t	score;
	char	*q_max;

	if (wordsize > WORDSIZE_MAX)
		bfatal(ERR_WORDSIZE, "%s:  Word size is greater than WORDSIZE_MAX (%d)",
			funcname, WORDSIZE_MAX);
	f = rframe;
	W = wordsize;
	T = threshold;
	wm1 = W - 1;
	nwords = nexact = 0;
	alpha = alphasize;
	matrx = matrix;
	ord = order;
	q_seq = qseq;

	dp = theDFA;

	if (dp == NULL) {
		dfa_memalloc(ckalloc);
		dp = dfa_new(0, AAID_MAX);
		if (dp == NULL)
			bfatal(ERR_DFALIB, "%s: dfa_new: %s", funcname, dfa_errstr(dfaerrno));
		dfa_halton(dp, '\0');
		dfa_begin(dp);
	}

	for (score=0, i=0; i<wm1; ++i)
		score += (*matrx)[q_seq[i]][q_seq[i]];

	for (q = q_seq, q_max = q_seq+qlen-wm1; q < q_max; ++q) {
		best[wm1] = T;
		for (i = wm1; i > 0; --i)
			best[i-1] = best[i] - (*maxcost)[q[i]];
		enumerate(0, 0);

		/* Check the score of a perfect match */
		score += (*matrx)[q[wm1]][q[wm1]];
		if (score < T && score > 0) {
			/*
			Hit on a perfect match, even if its score is less than T.
			This was not done by the program when the JMB paper
			was written, but was a suggested enhancement which should
			yield better (lower) figures for the number of MSPs
			that are overlooked on average.
			*/
			++nexact;
			addpat(q, (unsigned char *)q);
		}
		score -= (*matrx)[q[0]][q[0]];
	}
	if (nwords + nexact == 0)
		bfatal(ERR_NOWORDS, "%s: no satisfying words in query sequence to compare against database", funcname);

	if (numwords != NULL)
		*numwords += nwords;
	if (numexact != NULL)
		*numexact += nexact;

	return dp;
}

/* enumerate - try all letters in position i of candidate for a critical word;
	score is for wword[0]..wword[i-1] */
static void
enumerate(i, score)
	register int		i;
	register Score_t	score;
{
	/* possible letters by decreasing weight */
	register char	*o = (*ord)[q[i]];
	register int	c;
	register Score_t	new_score;

	for (c = 0; c < alpha; ++c) {
		new_score = score + (*matrx)[q[i]][wword[i] = o[c]];
		if (new_score < best[i])
			break;
		if (i < wm1)
			enumerate(i+1, new_score);
		else {
			if (new_score <= 0)
				break;
			++nwords;
			addpat(q, wword);
		}
	}
}

static void
addpat(q, wword)
	CharPtr	q;
	unsigned char	*wword;
{
	register DFA_PatID	pi;

	if (f == INT_MIN)
		pi.p = q+W;
	else
		pi.i = ((q-q_seq+W)<<3)+f;

	if (dfa_add(dp, wword, W, pi) == NULL)
		bfatal(ERR_DFALIB, "dfa_add: %s", dfa_errstr(dfaerrno));
}


/* end_table -- finish up the building of an automaton */
void
end_table(dp)
	DFAPtr	dp;
{
	if (dfa_close(dp) != 0)
		bfatal(ERR_DFALIB, "dfa_close: %s", funcname, dfa_errstr(dfaerrno));
}
