/* Copyright (C) 1997 by Warren R. Gish. All Rights Reserved. */ #include #include #include "nrdb.h" static void compress2bits PROTO((SeqStrPtr)); static void compress4bits PROTO((SeqStrPtr)); static void hashsave PROTO((SeqStrPtr sp, unsigned long hashval)); NRFilePtr NRFile_Open(filename, dbname) char *filename; char *dbname; { register NRFilePtr nrfp; register FILE *fp; static int filenum = 0; fp = openfile(filename, "r"); if (fp != NULL) setvbuf(fp, NULL, _IOFBF, 256*1024); else return NULL; nrfp = (NRFilePtr)mem_calloc(sizeof(*nrfp), 1); nrfp->fp = fp; nrfp->filename = str_dup(filename); nrfp->dbname = str_dup(dbname); nrfp->dbnamelen = strlen(dbname); nrfp->numseqs = 0; nrfp->nummatches = 0; nrfp->totres = 0; nrfp->dupres = 0; nrfp->numnull = 0; nrfp->filenum = ++filenum; return nrfp; } int NRFile_Close(nrfp) NRFilePtr nrfp; { int rc; rc = fclose(nrfp->fp); nrfp->fp = NULL; return rc; } void NRFile_Destruct(nrfp) NRFilePtr nrfp; { fclose(nrfp->fp); nrfp->fp = NULL; nrfp->dbname[0] = nrfp->filename[0] = NULLB; nrfp->dbnamelen = 0; (void) free(nrfp); return; } Boolean NRFile_Isopen(nrfp) NRFilePtr nrfp; { return (nrfp->fp != NULL); } SeqStrPtr NRFile_Read(nrfp) NRFilePtr nrfp; { static SeqStr ss; static SeqName ssn; static size_t namemax; FILE *fp; register unsigned char *cp, *cpend; register unsigned char ch; register unsigned long hashval = 0; int i; ss.chain = NULL; ss.clen = 0; ss.name1 = &ssn; ssn.chain = NULL; ssn.nrfp = nrfp; fp = nrfp->fp; if (getfasta(xltab, &ssn.name, &namemax, &ssn.namelen, &seqbuf, &seqbufmax, &ss.seqlen, fp) != 0) return NULL; maxnamelen = MAX(maxnamelen, ssn.namelen); ss.seq = (CharPtr)seqbuf; if (ss.seqlen > 0) { cp = (unsigned char *)ss.seq; cpend = cp + ss.seqlen; while (cp < cpend) { /* sequence is not necessarily NUL-terminated */ if (nt_atob[ch = *cp++] > 3) break; hashval *= 1103515245; hashval += (unsigned long)ch + 12345; } if (ch) { do { if (nt_atob[ch] > 15) break; hashval *= 1103515245; hashval += (unsigned long)ch + 12345; } while (ch = *cp++); if (ch) { do { hashval *= 1103515245; hashval += (unsigned long)ch + 12345; } while (ch = *cp++); ss.compression = 0; } else compress4bits(&ss); } else compress2bits(&ss); } ss.id = seqcnt; hashsave(&ss, hashval); nrfp->numseqs++; nrfp->totres += ss.seqlen; if (ss.seqlen > maxseqlen) maxseqlen = ss.seqlen; return &ss; } static void compress2bits(ssp) SeqStrPtr ssp; { register unsigned char *cp, *ccp; register unsigned char ch; unsigned char *cpend, bch; cp = ccp = (unsigned char *)ssp->seq; cpend = ccp + ssp->seqlen / 4; if (ccp < cpend) { bch = (((( (nt_atob[cp[0]] << 2) | nt_atob[cp[1]]) << 2) | nt_atob[cp[2]]) << 2) | nt_atob[cp[3]]; ssp->seq[0] = bch; cp += 4; while (++ccp < cpend) { *ccp = (((( (nt_atob[cp[0]] << 2) | nt_atob[cp[1]]) << 2) | nt_atob[cp[2]]) << 2) | nt_atob[cp[3]]; cp += 4; } } if (ch = *cp++) { bch = nt_atob[ch] << 2; if (ch = *cp++) { bch |= nt_atob[ch]; bch <<= 2; if (ch = *cp++) bch |= nt_atob[ch]; bch <<= 2; } else bch <<= 4; *ccp++ = bch; } ssp->clen = ccp - (unsigned char *)ssp->seq; ssp->compression = 2; } static void compress4bits(ssp) SeqStrPtr ssp; { register unsigned char *cp, *ccp; register unsigned char ch; unsigned char *cpend, bch; cp = ccp = (unsigned char *)ssp->seq; cpend = ccp + ssp->seqlen / 2; if (ccp < cpend) { bch = (nt_atob[cp[0]] << 4) | nt_atob[cp[1]]; ssp->seq[0] = bch; cp += 2; while (++ccp < cpend) { *ccp = (nt_atob[cp[0]] << 4) | nt_atob[cp[1]]; cp += 2; } } if (ch = *cp++) *ccp++ = nt_atob[ch] << 4; ssp->clen = ccp - (unsigned char *)ssp->seq; ssp->compression = 4; } static void hashsave(sp, hashval) SeqStrPtr sp; unsigned long hashval; { register HashRayPtr newhrp; if (sp->id >= hashraymax) { hashraymax += 10000; hashray = (HashRayPtr)mem_realloc(hashray, sizeof(*hashray)*hashraymax); if (hashray == NULL) fatal(1, "Out of memory for hashray"); } hashray[sp->id].sp = sp; hashray[sp->id].hashval = hashval; hashray[sp->id].nextid = ULONG_MAX; return; }