#include #include /****************************************************************************/ /* dfblast.c Jean-Michel Claverie feb 1992 */ /* module to read a BLAST output and to generate an annotated query */ /* USAGE: dfblast blast.output query [No_match_text] */ /* pipe > | dfblast + query [No_match_text] */ /* when No_match_text is NOT given, the defline is left as it is */ /****************************************************************************/ /* Please cite: Jean-Michel Claverie & David States (1993) */ /* Computers & Chemistry (in press) */ /****************************************************************************/ #define MAXLINE 1000 #define DEFLIMIT 1000 #define SEQLIMIT 500000 #define NAMELENGTH 5 #define NONEBLAST "*** NONE ***" main (argc, argv) int argc; char *argv[]; { int fgt_line(), gt_1stword(), gt_fasta(), compact_strg(); FILE *FIN, *QRY; char line[MAXLINE], *pline, word[MAXLINE]; char defline[DEFLIMIT], seq[SEQLIMIT], nonetext[DEFLIMIT]; int c, i, size, as_it_is=0; /*********************** test arguments and usages *************************/ if(argc< 3 || argc >4 ) {fprintf(stderr,"\n USAGE: %s blast.ouput query [No_match_text] \n",argv[0]); fprintf(stderr,"\n pipe | %s + query [No_match_text] \n",argv[0]); exit(0);} /************************ fopen and tests ********************************/ if (*argv[1]=='+') FIN=stdin; else if (NULL==(FIN=fopen(argv[1],"r"))) { fprintf(stderr," ERROR: opening %s ?\n",argv[1]); exit(0);} if (NULL==(QRY=fopen(argv[2],"r"))) { fprintf(stderr," ERROR: opening %s ?\n",argv[2]); exit(0);} if (argc==4) strcpy(nonetext,argv[3]); /* what to write in case of no match */ /**************************** get fasta sequence in *************************/ if (2>=(size=gt_1stfasta(QRY, defline, seq))) { fprintf(stderr," ERROR: query %s ?\n",argv[2]); exit(0);} /****************************************************************************/ while (1) /* skipping until Sequences */ { c=fgt_line(FIN,MAXLINE,line); if (c==EOF) { fprintf(stderr," ERROR: bad BLAST output %s ?\n",argv[1]); exit(0); } gt_1stword (line,word); if(!strcmp("Sequences",word)) break; /* found Sequences */ } while (isspace(c=fgetc(FIN))); /* Looking for Query line */ if (c==EOF) { fprintf(stderr," ERROR: bad BLAST output %s ?\n",argv[1]); exit(0); } *line = (char)c; fgt_line(FIN,MAXLINE-1,line+1); if (!strcmp(line,NONEBLAST)) { if (argc == 3) as_it_is =1; else strcpy(line,nonetext); } /************************ isolate SeqId ******************************/ c= gt_1stword (defline,word); if (*word != '>') /* bad Seq Id */ {fprintf(stderr," ERROR: defline %s ?\n",defline); exit(0); } if (cl_strg(word)==1) /* blank after > */ { c= gt_1stword (defline+c, word+1); if (cl_strg(word) */ /* function to input a line from an opened steam file into a char array,*/ /* with checking on the max length < max */ /* file "records" must be delimited by '\n' */ /* string is filled out until \n is encountered, or max-1 chars read */ /* or EOF occurs */ /* arguments: */ /* Fin: FILE pointer to input stream */ /* max: array dimension [], max length including the final \0 */ /* line: string or char * */ /* return: the number of usable chars in the string (\0 not included) */ /* : or max if capacity was exceeded, line contains */ /* max-1 usable characters. */ /* : 0 if NEW LINE was encountered first, line starts with '\0' */ /* :-1 if EOF was encountered, line starts with '\0' */ /* -------------------------------------------------------------------- */ fgt_line (Fin,max,line) int max; char line[]; FILE *Fin; { int c, i; for (i=0; (c=fgetc(Fin))!= EOF && c!='\n' && i<(max-1);i++) line[i]=c; line[i]='\0'; /* replace NEW LINE or last by '\0', don't count */ if (c==EOF) return(-1); /* EOF encountered */ if (i==(max-1) && c!='\n') { while (fgetc(Fin)!='\n'); /* clean buffer */ return(max); } /* flag overflow */ return (i); } /* sgt_line do the same things from a string */ /* ------------------------------------------------------------------- */ /* int gt_1stword() Jean-Michel Claverie feb 1991 SGI */ /* module to read the first word of a closed string */ /* ARGUMENTS */ /* IN: line, a closed string */ /* OUT: word, a closed string, WITH sufficient space allocation */ /* RETURN: next: the relative position of the first characters NOT */ /* belonging to the first word in the string */ /* thus, no more word in the string returns 0 (false) */ /* Note : the next call can then use : line + next, as a pointer to */ /* initiate the search for the next 1st word in the string */ /* -------------------------------------------------------------------- */ int gt_1stword (line,word) char *line, *word; { char *p; for ( p=line; isspace(*p) ; p++); /* skeep leading blanks */ if (*p == '\0') return (0) ; /* no word in line */ while (!isspace(*p) && *p!='\0'){ *word =(*p) ; word++; p++;} *word='\0'; return (p-line); } /********************************************************************** */ /* int gt_1stfasta () jean-Michel Claverie feb 92 SGI */ /* module to load the 1st defline and sequence from a fasta format file */ /* (DEFLIMIT-1) chars of the first line are stored */ /* Definition line must start with a " > " */ /* Sequence size input limited at SEQLIMIT-1 */ /* in: */ /* *Fin : "r" opened file (fasta db) */ /* char *defline, char *seq : char arrays for defline, sequence */ /* out: */ /* defline as a closed string, */ /* seq as a closed string, no blank, lower or upper */ /* return: sequence size or EOF (-1) if no more entry */ /********************************************************************** */ int gt_1stfasta(Fin, defline, seq) FILE *Fin; char *defline, *seq; { int i, c, size; char *in; /* loading all definition line up to deflimit */ if (EOF==(c=fgetc(Fin))) return c; /* end of fasta file */ if (c!='>') { fprintf(stderr,"\n ERROR: missing '>' in def line\n"); exit(0); } *defline='>'; for (i=DEFLIMIT-2, in=defline+1 ; i; i--, in++) { c=fgetc(Fin); if (c==EOF) {fprintf(stderr,"\n ERROR: unexpected EOF in def line\n"); exit(0); } *in = c; if (*in=='\n') break; /* end of def line reached */ } if (!i) /* DEFLIMIT was reached, go find end of line */ { while (EOF !=(c=fgetc(Fin))) if (c=='\n') break; if (c==EOF) {fprintf(stderr,"\n ERROR: unexpected EOF in def line %s\n", defline); exit(0); } } *in= '\0'; /* defline closure */ /* loading sequence up to EOF, next '>' or SEQLIMIT */ i=SEQLIMIT-1; size=0; while (EOF != (c=fgetc(Fin)) && c !='>') { if (!isspace (c)) { *seq =c; seq++; i--; size++; } if (!i) { fprintf (stderr,"\n ERROR: seq overflow %s %d \n", defline, size); exit(0); } } *seq='\0'; ungetc(c,Fin); return size; } /* ---------------------------------------------------------------------*/ /* #include */ /* cl_strg.c JMC nov 1990 SGI */ /* for space char cleaning and compacting a string */ /* this module gets a string with a \0 delimiter in and */ /* 1) remove all "blank characters", */ /* 2) write a '\0' right after the last non-blank character, */ /* 3) return the size=number of usable non blank characters. */ /* Use this module to provide a bona fide file name */ /* argument IN: */ /* inarray, a string with a least a End-Of-String delimiter */ /* argument OUT: */ /* outarray,the same string with no leading, trailing or intervening */ /* blanks, ended by '\0'). */ /* return: */ /* the number of non blank characters, excluding the final '\0'. */ int cl_strg(inarray) char *inarray; { char *outarray; int size=0; outarray=inarray; /* we will work within the same string */ while(isspace((int)*inarray)) inarray++; /* skeep all leading blanks */ while(*inarray != '\0') /* to the end of the input string */ { if(!isspace((int)*inarray)) {*outarray= *inarray; outarray++; size++;} /* copy it */ inarray++; /* move forward, don't copy */ } *outarray='\0'; /* close the string */ return(size); /* return size */ }