/*
Copyright (C) 1997 by Warren R. Gish.  All Rights Reserved.
*/
#include <ncbi.h>
#include <gishlib.h>
#include "nrdb.h"

static void	compress2bits PROTO((SeqStrPtr));
static void	compress4bits PROTO((SeqStrPtr));
static void hashsave PROTO((SeqStrPtr sp, unsigned long hashval));

NRFilePtr
NRFile_Open(filename, dbname)
	char	*filename;
	char	*dbname;
{
	register NRFilePtr	nrfp;
	register FILE	*fp;
	static int	filenum = 0;

	fp = openfile(filename, "r");
	if (fp != NULL)
		setvbuf(fp, NULL, _IOFBF, 256*1024);
	else
		return NULL;

	nrfp = (NRFilePtr)mem_calloc(sizeof(*nrfp), 1);
	nrfp->fp = fp;
	nrfp->filename = str_dup(filename);
	nrfp->dbname = str_dup(dbname);
	nrfp->dbnamelen = strlen(dbname);
	nrfp->numseqs = 0;
	nrfp->nummatches = 0;
	nrfp->totres = 0;
	nrfp->dupres = 0;
	nrfp->numnull = 0;
	nrfp->filenum = ++filenum;
	return nrfp;
}


int
NRFile_Close(nrfp)
	NRFilePtr	nrfp;
{
	int		rc;

	rc = fclose(nrfp->fp);
	nrfp->fp = NULL;
	return rc;
}


void
NRFile_Destruct(nrfp)
	NRFilePtr	nrfp;
{

	fclose(nrfp->fp);
	nrfp->fp = NULL;
	nrfp->dbname[0] = nrfp->filename[0] = NULLB;
	nrfp->dbnamelen = 0;
	(void) free(nrfp);
	return;
}


Boolean
NRFile_Isopen(nrfp)
	NRFilePtr	nrfp;
{
	return (nrfp->fp != NULL);
}


SeqStrPtr
NRFile_Read(nrfp)
	NRFilePtr	nrfp;
{
	static SeqStr	ss;
	static SeqName	ssn;
	static size_t	namemax;
	FILE	*fp;
	register unsigned char	*cp, *cpend;
	register unsigned char	ch;
	register unsigned long	hashval = 0;
	int	i;

	ss.chain = NULL;
	ss.clen = 0;
	ss.name1 = &ssn;
	ssn.chain = NULL;
	ssn.nrfp = nrfp;

	fp = nrfp->fp;
	if (getfasta(xltab, &ssn.name, &namemax, &ssn.namelen, &seqbuf, &seqbufmax, &ss.seqlen, fp) != 0)
		return NULL;

	maxnamelen = MAX(maxnamelen, ssn.namelen);
	
	ss.seq = (CharPtr)seqbuf;
	if (ss.seqlen > 0) {
		cp = (unsigned char *)ss.seq;
		cpend = cp + ss.seqlen;
		while (cp < cpend) { /* sequence is not necessarily NUL-terminated */
			if (nt_atob[ch = *cp++] > 3)
				break;
			hashval *= 1103515245;
			hashval += (unsigned long)ch + 12345;
		}
		if (ch) {
			do {
				if (nt_atob[ch] > 15)
					break;
				hashval *= 1103515245;
				hashval += (unsigned long)ch + 12345;
			} while (ch = *cp++);

			if (ch) {
				do {
					hashval *= 1103515245;
					hashval += (unsigned long)ch + 12345;
				} while (ch = *cp++);
				ss.compression = 0;
			}
			else
				compress4bits(&ss);
		}
		else
			compress2bits(&ss);
	}

	ss.id = seqcnt;
	hashsave(&ss, hashval);

	nrfp->numseqs++;
	nrfp->totres += ss.seqlen;

	if (ss.seqlen > maxseqlen)
		maxseqlen = ss.seqlen;

	return &ss;
}

static void
compress2bits(ssp)
	SeqStrPtr	ssp;
{
	register unsigned char	*cp, *ccp;
	register unsigned char	ch;
	unsigned char	*cpend, bch;

	cp = ccp = (unsigned char *)ssp->seq;
	cpend = ccp + ssp->seqlen / 4;

	if (ccp < cpend) {
		bch = (((( (nt_atob[cp[0]] << 2) |
				nt_atob[cp[1]]) << 2) | nt_atob[cp[2]]) << 2) |
				nt_atob[cp[3]];
		ssp->seq[0] = bch;
		cp += 4;
		while (++ccp < cpend) {
			*ccp = (((( (nt_atob[cp[0]] << 2) |
					nt_atob[cp[1]]) << 2) | nt_atob[cp[2]]) << 2) |
					nt_atob[cp[3]];
			cp += 4;
		}
	}

	if (ch = *cp++) {
		bch = nt_atob[ch] << 2;
		if (ch = *cp++) {
			bch |= nt_atob[ch];
			bch <<= 2;
			if (ch = *cp++)
				bch |= nt_atob[ch];
			bch <<= 2;
		}
		else
			bch <<= 4;
		*ccp++ = bch;
	}

	ssp->clen = ccp - (unsigned char *)ssp->seq;
	ssp->compression = 2;
}

static void
compress4bits(ssp)
	SeqStrPtr	ssp;
{
	register unsigned char	*cp, *ccp;
	register unsigned char	ch;
	unsigned char	*cpend, bch;

	cp = ccp = (unsigned char *)ssp->seq;
	cpend = ccp + ssp->seqlen / 2;

	if (ccp < cpend) {
		bch = (nt_atob[cp[0]] << 4) | nt_atob[cp[1]];
		ssp->seq[0] = bch;
		cp += 2;
		while (++ccp < cpend) {
			*ccp = (nt_atob[cp[0]] << 4) | nt_atob[cp[1]];
			cp += 2;
		}
	}

	if (ch = *cp++)
		*ccp++ = nt_atob[ch] << 4;

	ssp->clen = ccp - (unsigned char *)ssp->seq;
	ssp->compression = 4;
}

static void
hashsave(sp, hashval)
	SeqStrPtr	sp;
	unsigned long hashval;
{
	register HashRayPtr	newhrp;

	if (sp->id >= hashraymax) {
		hashraymax += 10000;
		hashray = (HashRayPtr)mem_realloc(hashray, sizeof(*hashray)*hashraymax);
		if (hashray == NULL)
			fatal(1, "Out of memory for hashray");
	}
	hashray[sp->id].sp = sp;
	hashray[sp->id].hashval = hashval;
	hashray[sp->id].nextid = ULONG_MAX;
	return;
}

