/*
 *	dfa.h
 *
 *	Deterministic Finite-state Automata header
 *
 *	Warren Gish
 *	November 1989
 *
 *	(For a better look, set tab stops every 4 characters).
 */
#ifndef __DFA__
#define __DFA__
#include <gish.h>

#ifndef __DFAEXTERN
#define __DFAEXTERN extern
#endif

#define DFA_MEMINCR_DEFAULT	(64*KBYTE)


/*
 *	Error return codes and their associated meanings
 */
typedef enum {
		dfaErrNone = 0,
		dfaErrUnknown,		/* Nonspecific error */
		dfaErrBadPtr,		/* Bad pointer to DFA structure */
		dfaErrIncompat,		/* Incompatible dfa_add function was called */
		dfaErrBadParm,		/* Bad parameter value */
		dfaErrMemincrTooSml,/* memincr too small to satisfy minimal requests */
		dfaErrNoMem,		/* Insufficient memory available for request */
		dfaErrPatlen,		/* Zero length pattern */
		dfaErrOpstate,		/* DFA is in the wrong operational state */
		dfaErrNonAlpha,		/* Non-alphabetic letter encountered */
		dfaErrNullParm,		/* NULL was passed for a required parameter */
		dfaErrAlphaSize,	/* Bad alphabet size */
		dfaErrAlphaRange,	/* Bad range (amax - amin) for alphabet */
		dfaErrAlphaDomain,	/* Bad domain (amin...amax) for alphabet */
		dfaErrFileIO,		/* Stream I/O error (check global variable errno) */
		dfaErrNotFound,		/* Pattern not found */
		dfaErrReport,		/* report returned non-zero to dfa_scanf routines */
		dfaErrMax
	} DFA_Error, PNTR DFA_ErrorPtr;

__DFAEXTERN DFA_Error	dfaerrno;


/*
 * Internal operational states (different from DFA_States) of a DFA structure
 */
typedef enum {
		dfaOpstateNone = 0,
		dfaOpstateInit,		/* basic initialization done */
		dfaOpstateMemInit,	/* dfa memory management has been initialized */
		dfaOpstateAlphaInit,	/* alphabet has been validated and processed */
		dfaOpstateVirgin,	/* DFA is ready for adding its first pattern */
		dfaOpstateOpen,		/* DFA is ready for adding another pattern */
		dfaOpstateBuilding,	/* a pattern is being added to the DFA */
		dfaOpstateClosing,	/* DFA is in the process of being closed */
		dfaOpstateClosed,	/* DFA is closed */
		dfaOpstateZombie,	/* DFA is garbage... and yet it lives! */
		dfaOpstateDestroyed,	/* DFA has been destroyed */
		dfaOpstateMax
	} DFA_Opstate, PNTR DFA_OpstatePtr;


typedef struct _dfa_state {
		struct _dfa_state PNTR next[1/*...*/]; /* length depends on alphabet */
	} DFA_State, PNTR DFA_StatePtr;


typedef union { /* User may wish to identify a pattern by numerous means */
		Nlm_VoidPtr p;
		long i;
		unsigned long u;
	} DFA_PatID, PNTR DFA_PatIDPtr;

			/* Linked list of patIDs */
typedef struct _dfa_patlist {
			struct _dfa_patlist	PNTR chain;
			DFA_PatID	patID;
		} DFA_Patlist, PNTR DFA_PatlistPtr;

			/* Structure of an accepting transition */
typedef struct _dfa_accept {
			DFA_StatePtr	retState; /* Continuing (return) state pointer */
			DFA_Patlist	pl;	/* Linked list of accepted patIDs */
		} DFA_Accept, PNTR DFA_AcceptPtr;


typedef struct _DFA_Buf {
		struct _DFA_Buf	PNTR chain;	/* next allocated buffer (or NULL) */
		size_t	bsize;		/* Total extent (in bytes) of this buf element */
		size_t	bused;		/* No. of bytes currently in use in this buf */
		Nlm_VoidPtr		buf[1];
	} DFA_Buf, PNTR DFA_BufPtr;

typedef struct {
	long		magic;	/* magic number indicates this really is a DFA */
#define DFA_MAGIC	0x7291d255
	DFA_Opstate	opstate;	/* current operational state of DFA */
	ValNode		user;		/* for the user */
	int			amin, amax;	/* min. & max. letter values */
	size_t		asize;		/* no. of letters in the alphabet */
	CharPtr		ctl;		/* to keep track of halt & ignore letters */
	DFA_StatePtr	state0;	/* pointer to initial DFA_State of the DFA */
	size_t		nstates;	/* no. of DFA_States in the machine */
	size_t		naccepts;	/* no. of accepting transitions in the machine */
	size_t		statesize;	/* no. of bytes in a single DFA_State */
	size_t		accsize;	/* size of accepting transition block */
	size_t		patsize;	/* size of patlist block */
	size_t		idsize;		/* sizeof patID block, word-aligned */
	size_t		xlen;		/* length of extended patID block, unaligned */
	size_t		nignore;
	int PNTR	ignore;
	size_t		nhalton;
	int PNTR	halton;
	DFA_BufPtr	lastbuf;	/* pointer to last dfa_buf allocated */
	CharPtr		freeptr;	/* start of free storage (word-aligned) */
	size_t		memavail;	/* amount of memory available in current buffer */
	DFA_Buf		buf0;
	} DFA, PNTR DFAPtr;

/**********************
*
*	The DFA functions
*
***********************/

/* dfa_new -- create a skeletal DFA devoid of patterns */
DFAPtr dfa_new PROTO((int amin, int amax));

/* dfa_memest -- obtain an estimate of the minimum value required for memincr */
size_t	dfa_memest PROTO((int amin, int amax));

DFA_Error dfa_ignore PROTO((DFAPtr dp, int a));
DFA_Error dfa_restore PROTO((DFAPtr dp, int a));
DFA_Error dfa_halton PROTO((DFAPtr dp, int a));
size_t dfa_xlen PROTO((DFAPtr dp, size_t xlen));

DFA_Error dfa_begin PROTO((DFAPtr));

Nlm_VoidPtr	(_cdecl *dfa_memalloc PROTO((Nlm_VoidPtr (_cdecl *)()))) ();
size_t	dfa_memincr PROTO((size_t memincr));
void	(_cdecl *dfa_memfree PROTO((void (_cdecl *)()))) ();


/* dfa_add -- add a pattern to the acceptance list of a skeletal DFA */
DFA_PatlistPtr dfa_add PROTO((DFAPtr dp, unsigned char PNTR pattern, size_t patlen, DFA_PatID patID));

/* dfa_iadd -- add an int pattern to the acceptance list of a skeletal DFA */
DFA_PatlistPtr dfa_iadd PROTO((DFAPtr dp, int PNTR pattern, size_t patlen, DFA_PatID patID));

DFA_PatlistPtr dfa_addx PROTO((DFAPtr dp, unsigned char PNTR pattern, size_t patlen, Nlm_VoidPtr idptr));

DFA_PatlistPtr dfa_iaddx PROTO((DFAPtr dp, int PNTR pattern, size_t patlen, Nlm_VoidPtr idptr));

/* dfa_close -- finalize the structure of a skeletal DFA (no more patterns) */
DFA_Error dfa_close PROTO((DFAPtr));

/* dfa_destruct -- release all memory allocated to a DFA */
DFA_Error dfa_destruct PROTO((DFAPtr));

/* dfa_check -- examine a DFA for validity */
DFA_Error dfa_check PROTO((DFAPtr));

/* dfa_opstate -- return the operational state of the DFA */
DFA_Opstate dfa_opstate PROTO((DFAPtr));

/* dfa_error -- print on stderr a description of the last DFA error */
DFA_Error dfa_perror PROTO((CharPtr s));

/* dfa_errstr -- return pointer to a description of the specified DFA error */
char *dfa_errstr PROTO((DFA_Error));

/* dfa_extent -- obtain the full extent of storage allocated to the DFA */
unsigned long dfa_extent PROTO((DFAPtr));

/* dfa_size -- obtain the amount of storage actually needed by the DFA */
unsigned long dfa_size PROTO((DFAPtr));

/* dfa_wabsize -- obtain the no. of bytes required for the wrap-around buffer */
long dfa_wabsize PROTO((DFAPtr));

/* dfa_dump -- dump contents of a DFA into a file for debugging */
DFA_Error dfa_dump PROTO((DFAPtr dp, FILE *fp));

/* dfa_accepts -- obtain the output list for a particular pattern */
DFA_PatlistPtr dfa_accepts PROTO((DFAPtr dp, unsigned char PNTR pattern, size_t patlen));

/* dfa_contains -- whether DFA contains the given pattern/patID combination */
DFA_PatlistPtr dfa_contains PROTO((DFAPtr dp, unsigned char PNTR pattern, size_t patlen, DFA_PatID patID));


/*****************************************************
*
*  Example routines for scanning input with a DFA...
*
******************************************************/

/* dfa_nextstate -- transit to the next state from the current state */
DFA_StatePtr dfa_nextstate PROTO((DFA_StatePtr S, int ch, size_t pos,
		int (_cdecl *report) (size_t pos, DFA_PatID patID) ));

DFA_Error dfa_scanf PROTO((DFAPtr dp,
		int (_cdecl *report) (size_t off, DFA_PatID patID) ));

/* dfa_pscanf -- input from function like fgetc which returns EOF when done */
DFA_Error dfa_pscanf PROTO((DFAPtr dp,
		int (_cdecl *gettoken) (Nlm_VoidPtr stream), Nlm_VoidPtr stream,
		int (_cdecl *report) (size_t off, DFA_PatID patID) ));

/* dfa_fscanf -- input from a file */
DFA_Error dfa_fscanf PROTO((DFAPtr dp, FILE *fp,
		int (_cdecl *report) (size_t off, DFA_PatID patID) ));

/* dfa_sscanf -- input from a null-terminated character string */
DFA_Error dfa_sscanf PROTO((DFAPtr dp, CharPtr query, size_t querylen,
		int (_cdecl *report) (size_t off, DFA_PatID patID) ));



/**********************************************************************
The following macros are used to detect the encounters of accepting
state transitions, which are recognized as being unaligned pointers;
and to convert state pointers into invalid, misaligned values through
arithmetic or logical operations.
***********************************************************************/
#define DFA_MUNGE_BIT	MUNGE_BIT
#define DFA_ISACCEPTING(state)	ISMUNGED(state)
#define DFA_ISMUNGED(state)	ISMUNGED(state)
#define DFA_MUNGED(addr)	MUNGED(addr)
#define DFA_UNMUNGED(addr)	UNMUNGED(addr)

#endif /* !__DFA__ */

