/* -------------------------------------------------------------------- */ /* Framscore.c Jean-Michel Claverie SGI March 93 */ /* Module to - read a genetic code with bias */ /* - compute its transition via frameshif */ /* - build the corresponding transition matrix */ /* -------------------------------------------------------------------- */ /***************** genetic code format **********************************/ /* anywhere on the line: ttt f 0.56 */ /* ttc f 0.44 */ /* atg m 1 */ /* line with less than 5 chars or more than 15 are comments */ /* if 64 codons have not been entered, error is signaled */ /************************************************************************/ #include #include #define MAXAA 22 #define MAXCODON 6 /* max number of codon per amino-acid */ main(argc,argv) int argc; char *argv[]; { char upper(); int cod_n(), dcod_n(), gt_gencod(), usage(); int size, i, j, k, rf, code, initcode, inew, jnew; double score[MAXAA][MAXAA], score2[MAXAA][MAXAA], val, S; char CODGEN[65],setc[MAXAA], codon[4]; char *COD, *FIND ; char result[MAXAA][65], *pr, *pr1; float bias[64]; float factor[MAXAA][64]; if (argc!=3) usage(argv[0]); rf=atoi(argv[2]); if(rf <-3 || rf>3 || rf==0) usage(argv[0]); if(64!=gt_gencod(argv[1],CODGEN,bias)) exit(0); /* get genetic code */ printf("\n Genetic code: %s\n\n",argv[1]); printf("\n %s\n",CODGEN); printf("\n modified through %d frameshift \n\n", rf); printf("\n"); size=atoms(CODGEN,setc); /* extract aa codes*/ for (i=0; i to any [j] */ for (j=0, S=0. ; j :[-3,-2,-1,1,2,3]>\n",name); exit(0); } /* -------------------------------------------------------------------- */ /* gt_gencod.c Jean-Michel Claverie SGI May 93 */ /* module to open, read and interpret a genetic code file */ /* arguments: input: fname = filename to open */ /* genetic code format: */ /* aaa k 0.55 */ /* output AAcod = string of length 65 with the */ /* bias: float array [64] codon bias */ /* residue 1-letter code at position cod_n(codon) -1 */ /* AAcod[65] is closed by AAcod[64]='\0' */ /* return: 64 if OK, number of codon succesfully read if error */ /* -------------------------------------------------------------------- */ /* genetic code text file format: */ /* <1-letter code as : ttt f 0.67 LOWER CASE !!! */ /* lines with <=5 of > 15 non-blank characters are comments */ /* -------------------------------------------------------------------- */ gt_gencod(fname,AAcod,bias) char AAcod[], *fname; float bias []; { int fgt_line(), cod_n(), cl_strg(); char line[100],codon[4]; FILE *IF; int i,c,size; if(NULL==(IF=fopen(fname,"r"))) { fprintf(stderr, "\n ERROR: while opening %s\n",fname); return(0);} for(i=0;i<64; ) /* no automatic increment */ { size=fgt_line(IF,100,line); if(size<0) break; /* EOF */ size=cl_strg(line); if(size >=5 && size <15 ) /* data line ? */ { codon[0]=line[0]; codon[1]=line[1]; codon[2]=line[2]; codon[3]='\0'; c=cod_n(codon,3)-1; /* cod_n() returns a non-zero code */ AAcod[c]=line[3]; if (1!=sscanf(&line[4],"%f", &bias[c])) { fprintf(stderr,"\n ERROR: bias format ?\n"); return (i); } i++; } } if (i<64) fprintf(stderr,"\n ERROR: while reading %s\n",fname); AAcod[64]='\0'; fclose(IF); return (i); /* return=64 is OK */ } /* -------------------------------------------------------------------- */ /* int dcod_n() JMC dec 1990 SGI */ /* this module decodes a code into the corresponding subsequence */ /* subsequence */ /* arguments: code, an integer value of the non-zero code */ /* : ktuple, an integer with the ktuple value >0 */ /* : seq, a char array contains the result of the decoding */ /* return : nothing */ /* The ktuple range is tested, NOT the coherence with the code value */ /* ktuple limit is 11 for nucleotides */ int dcod_n(code,ktuple,seq) int code, ktuple; char *seq; { static int val[11] = {1,4,16,64,256,1024,4096,16384,65536,262144,1048576}; char *n = "tcga"; /* allowed nucleotides */ int k, l[11]; if(ktuple<1 || ktuple >11){printf("\n ERROR ktuple value? dcod_n\n"); exit(0);} if(code<1) {printf("\n ERROR code value? dcod_n\n"); exit(0);} --code; /* get the real code */ for (k=ktuple-1; k>=0 ; k--) { l[k]=code/val[k]; code-=l[k]*val[k]; } for (k=0; kclosed string mode) */ /* output: the same char array, complemented */ /* return: size */ /* WARNING: complement is performed within the same array !! */ /* -------------------------------------------------------------------- */ int comp(array, size) char *array; int size; { int i; size= (size <1 ) ? -1 : size; for (i=0; *array!='\0' && i!=size; i++,array++) switch (*array) { case 'a': *array='t'; break; case 't': case 'u': *array='a'; break; case 'g': *array='c'; break; case 'c': *array='g'; break; case 'r': *array='y'; break; case 'y': *array='r'; break; default: break; } return i; } /* -------------------------------------------------------------------- */ /* rev.c Jean-Michel Claverie jan 91 SGI */ /* Module to reverse a char array of known size, or a closed string */ /* arguments: input: char array or closed string ('\0') */ /* input: integer size (0 or negative ->closed string mode) */ /* output: the same char array, reversed */ /* return: size */ /* WARNING: reversion is performed within the same array !! */ /* -------------------------------------------------------------------- */ int rev(array, size) char *array; int size; { char *pmin, *pmax, c; pmin=array; if (size<1){ for (pmax=array; *pmax!='\0'; pmax++); /* get to the end */ size=pmax-array; /* record size */ } for (pmax=array+size-1; pmax>pmin; pmax--,pmin++) { c= (*pmin); *pmin= (*pmax); *pmax=c; } return size; } /* cod_n.c JMC dec 1990 SGI */ /* This module compute the non-zero code of a (DNA) subsequence */ /* arguments : a *char pointer to the current position of the seq */ int cod_n(seq,ktuple) char *seq; int ktuple; { static int val[11] = {1,4,16,64,256,1024,4096,16384,65536,262144,1048576}; char *n = "tcga"; /* allowed nucleotides */ char *a; int i; int code = 0; int *v; if(ktuple>11 || ktuple<1){printf("\n ERROR ktuple value? cod_n\n"); exit(0);} for(i=0,v=val; i */ /* function to input a line from an opened steam file into a char array,*/ /* with checking on the max length < max */ /* file "records" must be delimited by '\n' */ /* string is filled out until \n is encountered, or max-1 chars read */ /* or EOF occurs */ /* arguments: */ /* Fin: FILE pointer to input stream */ /* max: array dimension [], max length including the final \0 */ /* line: string or char * */ /* return: the number of usable chars in the string (\0 not included) */ /* : or max if capacity was exceeded, line contains */ /* max-1 usable characters. */ /* : 0 if NEW LINE was encountered first, line starts with '\0' */ /* :-1 if EOF was encountered, line starts with '\0' */ /* -------------------------------------------------------------------- */ fgt_line (Fin,max,line) int max; char line[]; FILE *Fin; { int c, i; for (i=0; (c=fgetc(Fin))!= EOF && c!='\n' && i<(max-1);i++) line[i]=c; line[i]='\0'; /* replace NEW LINE or last by '\0', don't count */ if (c==EOF) return(-1); /* EOF encountered */ if (i==(max-1) && c!='\n') { while (fgetc(Fin)!='\n'); /* clean buffer */ return(max); } /* flag overflow */ return (i); } /* sgt_line do the same things from a string */ /* ---------------------------------------------------------------------*/ /* #include */ /* cl_strg.c JMC nov 1990 SGI */ /* for space char cleaning and compacting a string */ /* this module gets a string with a \0 delimiter in and */ /* 1) remove all "blank characters", */ /* 2) write a '\0' right after the last non-blank character, */ /* 3) return the size=number of usable non blank characters. */ /* Use this module to provide a bona fide file name */ /* argument IN: */ /* inarray, a string with a least a End-Of-String delimiter */ /* argument OUT: */ /* outarray,the same string with no leading, trailing or intervening */ /* blanks, ended by '\0'). */ /* return: */ /* the number of non blank characters, excluding the final '\0'. */ int cl_strg(inarray) char *inarray; { char *outarray; int size=0; outarray=inarray; /* we will work within the same string */ while(isspace((int)*inarray)) inarray++; /* skeep all leading blanks */ while(*inarray != '\0') /* to the end of the input string */ { if(!isspace((int)*inarray)) {*outarray= *inarray; outarray++; size++;} /* copy it */ inarray++; /* move forward, don't copy */ } *outarray='\0'; /* close the string */ return(size); /* return size */ } /* -------------------------------------------------------------------- */ /* int atoms() Jean-Michel Claverie SGI Jan 91 */ /* Module to extract the characters constituting an input string . */ /* arguments: */ /* input: the CLOSED string to be analyzed */ /* set: the CLOSED string of the constituting chars */ /* return: the number of different chars found in input */ /* the atoms are stored in set in the order of their first appearance */ /* in the input string. */ /* Null size string are permitted as long as they are properly closed */ /* -------------------------------------------------------------------- */ int atoms(input, set) char *input, *set; { int size; char *s; *set='\0'; for(size=0; *input !='\0'; input++) { for (s=set; (*input-*s) && (*s!='\0') ; s++); if (*s=='\0') /* new atom */ { *s= (*input); *(s+1)='\0'; size++; } } return size; } /* -------------------------------------------------------------------- */ /* upper() Jean-Michel Claverie SGI Jan 91 */ /* module to convert a character from lower (a) to upper (A) case */ /* argument: input : a char */ /* return: the converted char */ /* -------------------------------------------------------------------- */ char upper(c) char c; { if (c>='a' && c<='z') return (c + 'A' -'a'); else return(c); }