#include <ncbi.h>
#include <gishlib.h>
#include "blastapp.h"

/* -----------------   read file of substitution weights   ----------------*/

/* see weights.c */

int
get_weightx(mname, gcode, bits, lettermax, xltab, asize, minchar, maxchar,
			achars, order, lows, highs, maxcost, matrx)
	char	*mname;	/* Name of file containing the substitution matrix */
	char	*gcode; /* genetic code description */
	double	*bits;
	int		lettermax;	/* Max binary letter value used to screen input file */
	int		*xltab;	/* ASCII to binary translation table */
	int		*asize;	/* No. of letters in the alphabet */
	int		*minchar, *maxchar;	/* Min. and max. letter values */
	signed char
		/* array of amino acid names, plus null terminator */
		(*achars)[ALPHASIZE_MAX+1],
		/* order[i][j] = j-th best char to sub. for i */
		(*order)[ALPHAVAL_MAX+1][ALPHASIZE_MAX];

	Score_t
		*lows,			/* lowest substitution value */
		*highs,			/* highest substitution value */
		/* max cost of substituting for a character */
		(*maxcost)[ALPHAVAL_MAX+1],
		/* matrx[i][j] = cost of replacing i by j */
		(*matrx)[ALPHAVAL_MAX+1][ALPHAVAL_MAX+1];
{
	char buf[FILENAME_MAX], *cp;
	FILE	*fp;
	int		i, j, lineno = 1, thechar, ch, chcnt, chno;
	double	xscore;
	Score_t	s;
	Score_t	*m;
	signed char	*o, *p, *q, t;
	char	*gcp;
	char	once = 0;

Retry:
	strcpy(buf, mname);
	strcat(buf, ".cdi");
	if ((fp = ckopen(buf, "r", 0)) == NULL) {
		if (++once > 1)
			bfatal(ERR_FOPEN, "Could not find or open a scoring matrix file named:  %s", mname);
		cp = getenv("BLASTMAT");
		sprintf(buf, "%s%s%s%s%s",
				(cp == NULL ? BLASTMAT : cp),
				DIRDELIMSTR,
				"aa",
				DIRDELIMSTR,
				mname);
		mname = buf;
		goto Retry;
	}

	/* Ignore any initial comment line(s) */
	while ((i = fgetc(fp)) == '#') {
		char	buf[1024];
		++lineno;
		if (fgets(buf, sizeof buf, fp) == NULL)
			break;
	}
	ungetc(i, fp);

	/* read residue names */
	*minchar = INT_MAX;
	*maxchar = INT_MIN;
	p = (*achars);
	while ((i = fgetc(fp)) != '\n' && i != 0) {
		if ((j = xltab[i]) < lettermax) {
			if (p - (*achars) >= DIM(*achars)-1)
				bfatal(ERR_SUBFILE, "Too many amino acid names in %s.", mname);
			if (j > *maxchar)
				*maxchar = j;
			if (j < *minchar)
				*minchar = j;
			*p++ = j;
		}
		else {
			if (isspace(i))
				continue;
			if (isprint(i))
				bfatal(ERR_SUBFILE, "%s:  Bad a.a. residue: ``%c.''", mname, i);
			else
				bfatal(ERR_SUBFILE, "%s:  Bad a.a. residue: #%d.", mname, i);
		}
	}

	*asize = p - (*achars);
	if (*asize >= DIM(*matrx))
		bfatal(ERR_SUBFILE, "too many amino acid names in %s.", mname);

	/* For illegal characters, set weight to lowest possible value. */
	for (i=0; i<DIM(*maxcost); ++i)
		(*maxcost)[i] = SCORE_MIN/2;
	for (i=0; i<DIM(*matrx); ++i)
		for (j=0; j<DIM((*matrx)[0]); ++j)
			(*matrx)[i][j] = SCORE_MIN/2;

	/* read substitution weights for each residue */
	*lows = SCORE_MAX;
	*highs = SCORE_MIN;
	for (chcnt = 0; chcnt < *asize; ++lineno) {
		ch = fgetc(fp);
		if (ch == '\n')
			continue;
		if (ch == EOF || isspace(ch))
			bfatal(ERR_SUBFILE, "Missing letter in scoring matrix file ``%s'', line %d.", mname, lineno);
		++chcnt;
		thechar = xltab[ch];
		for (chno = 0; chno < *asize; ++chno)
			if (thechar == (*achars)[chno])
				break;
		if (chno >= *asize)
			bfatal(ERR_SUBFILE, "Unexpected letter in scoring matrix file ``%s'', line %d.", mname, lineno);
		for (j = 0; j < *asize; ++j) {
			if (fscanf(fp, "%lg", &xscore) != 1)
				bfatal(ERR_SUBFILE, "Conversion error reading matrix file ``%s'', on line %d.",
							mname, lineno);
			gcp = gcode;
			while ((gcp = strchr(gcp, thechar)) != NULL) {
				s = (*matrx)[gcp-gcode+1][(*achars)[j]] =
						fct_nint(xscore + (bits != NULL ? bits[gcp-gcode] : 0));
				if (*lows > s)
					*lows = s;
				if (*highs < s)
					*highs = s;
				if (++gcp - gcode > 64)
					break;
			}
		}
		while ((ch = fgetc(fp)) != '\n' && ch != EOF)
			;
	}
	for (i = 1; i < 65; ++i) {
		o = (*order)[i];
		for (j = 0; j < *asize; ++j)
			o[j] = (*achars)[j];
		/* bubble-sort residue names by cost of substitution for i-th residue */
		for (q = o + (*asize-1); q > o; --q)
			for (p = o; p < q; ++p)
				if ((*matrx)[i][*p] < (*matrx)[i][*q]) {
					t = *p;
					*p = *q;
					*q = t;
				}
		(*maxcost)[i] = (*matrx)[i][*o];
/*		printf("[%d,%d] = %ld\n", (int)i, (int)*o, (long) (*matrx)[i][*o]); */
	}
	(void) fclose(fp);

	return 0;
}
