/* PAMxT.c Jean-Michel Claverie SGI feb 1991 / March 1993 */ /* pamxt - Calculate a PAM to a given number; * - or/and a scoring matrix given a "transition" matrix * ----------------------------------------------------------------------- * The evolution model corresponding to this program is symetrical * [pam] [T] [pam] with the default option, * asymetrical [pam] [T] with the -a (asymetrical) option * ---------------------------------------------------------------------- * The command syntax is * * pamxt * or * pamxt -a * output file -> PAMxT.out * *---------------------------- IMPORTANT WARNING -------------------------- * The transition matrix must be given according the * ARNDCQEGHILKMFPSTWYV order ! *------------------------------------------------------------------------- * Relevant literature: * * Claverie, J-M. (1993) J. MOL. BIOLOGY 234: 1140-1157. * Detecting Frame Shifts by Amino Acid Sequence Comparison. * * Dayhoff, M. O., Schwartz, R. M. & Orcutt, B. C. (1978). * A model of evolutionary change in proteins. In Atlas of Protein Sequence * and Structure, Vol. 5, Suppl. 3, Ed. M. O. Dayhoff, pp. 345-352. * Natl. Biomed. Res. Found., Washington. * * Karlin, S. & Altschul, S., Methods for assessing the statistical significance * of molecular sequence features by using general scoring schemes * Proc. Nat. Acad. Sci. USA, 87, 2264-2268, 1990 */ /* compile with cc -lm */ #include #include #define DIM 20 #define MAXLINE 1000 #define MLIMIT 20 /* One letter amino acid code */ char aa[] = {"ARNDCQEGHILKMFPSTWYV"}; /* Dayhoff amino acid background frequencies */ double fq[DIM] = { 0.086, 0.041, 0.040, 0.047, 0.033, 0.038, 0.050, 0.089, 0.034, 0.037, 0.085, 0.081, 0.015, 0.040, 0.051, 0.070, 0.058, 0.010, 0.030, 0.065}; /* Dayhoff 1 PAM stochastic amino acid transition matrix */ int mdm[400] = { 9867, 2, 9, 10, 3, 8, 17, 21, 2, 6, 4, 2, 6, 2, 22, 35, 32, 0, 2, 18, 1, 9914, 1, 0, 1, 10, 0, 0, 10, 3, 1, 19, 4, 1, 4, 6, 1, 8, 0, 1, 4, 1, 9822, 36, 0, 4, 6, 6, 21, 3, 1, 13, 0, 1, 2, 20, 9, 1, 4, 1, 6, 0, 42, 9859, 0, 6, 53, 6, 4, 1, 0, 3, 0, 0, 1, 5, 3, 0, 0, 1, 1, 1, 0, 0, 9973, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 5, 1, 0, 3, 2, 3, 9, 4, 5, 0, 9876, 27, 1, 23, 1, 3, 6, 4, 0, 6, 2, 2, 0, 0, 1, 10, 0, 7, 56, 0, 35, 9865, 4, 2, 3, 1, 4, 1, 0, 3, 4, 2, 0, 1, 2, 21, 1, 12, 11, 1, 3, 7, 9935, 1, 0, 1, 2, 1, 1, 3, 21, 3, 0, 0, 5, 1, 8, 18, 3, 1, 20, 1, 0, 9913, 0, 1, 1, 0, 2, 3, 1, 1, 1, 4, 1, 2, 2, 3, 1, 2, 1, 2, 0, 0, 9871, 9, 2, 12, 7, 0, 1, 7, 0, 1, 33, 3, 1, 3, 0, 0, 6, 1, 1, 4, 22, 9947, 2, 45, 13, 3, 1, 3, 4, 2, 15, 2, 37, 25, 6, 0, 12, 7, 2, 2, 4, 1, 9924, 20, 0, 3, 8, 11, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 5, 8, 4, 9875, 1, 0, 1, 2, 0, 0, 4, 1, 1, 1, 0, 0, 0, 0, 1, 2, 8, 6, 0, 4, 9944, 0, 2, 1, 3, 28, 0, 13, 5, 2, 1, 1, 8, 3, 2, 5, 1, 2, 2, 1, 1, 9924, 12, 4, 0, 0, 2, 28, 11, 34, 7, 11, 4, 6, 16, 2, 2, 1, 7, 4, 3, 17, 9840, 38, 5, 2, 2, 22, 2, 13, 4, 1, 3, 2, 2, 1, 11, 2, 8, 6, 1, 5, 32, 9869, 0, 2, 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 9976, 1, 0, 1, 0, 3, 0, 3, 0, 1, 0, 4, 1, 1, 0, 0, 21, 0, 1, 1, 2, 9947, 1, 13, 2, 1, 1, 3, 2, 2, 3, 3, 57, 11, 1, 17, 1, 3, 2, 10, 0, 2, 9901}; double pam0[DIM][DIM]; /* initial pam */ double store[DIM][DIM]; /* storage */ double pamn [DIM][DIM]; /* n th power of pam */ double T[DIM][DIM]; /* transition matrix */ double PM[DIM][DIM]; /* freq*pamn matrix */ double PP[DIM][DIM]; /* fq[i]*fq[j] */ double MAT[DIM+3][DIM+3]; /* Final Scoring matrix */ main(argc,argv) int argc; char *argv[]; { int i,j,k,n,N,size, option=0; double S, lambda; FILE *F; if (argc <2 || argc >4) { printf("\n USAGE %s number\n",argv[0]); printf (" %s number Transition_Matrix [-a]\n",argv[0]); exit(0);} if (1 != sscanf(argv[1], "%d", &N)) { printf("\n ERROR: pam number %s\n",argv[1]); exit(0); } if (argc==4) { if (!strcmp(argv[3],"-a")) option=1; else { printf("\n ERROR: option? %s\n",argv[3]); exit(0);} } /* compute the float transition probability matrix */ for (k=j=0; j 1 ? */ for (i=0; i store */ Msqmul(T,pamn,store,DIM); /* compute the T.Mij sum -> 1 ? */ for (S=0.,i=0; i TM */ Msqmul(PM,store,T,DIM); } else /* ASYMETRICAL [PAM]x [T] */ { /* compute MxT -> store */ Msqmul(pamn, T,store,DIM); /* compute final target frequency matrix T: P(i,i)MT(i,k) ; P: freq of i */ for (i=0; i 1 ? */ for (S=0.,i=0; i 1 ? */ for (S=0.,i=0; i0.001; lambda -= 0.1) { for (S=0.,i=0; i express as half-bit and -> DIM+2 */ for (i=0; i *pJ) ? *pI : *pJ ); /* return max dimension */ } /* ---------------------------------------------------------------------*/ /* fgt_line(Fin, max,line) JMC jan 1991 SGI */ /* #include */ /* function to input a line from an opened steam file into a char array,*/ /* with checking on the max length < max */ /* file "records" must be delimited by '\n' */ /* string is filled out until \n is encountered, or max-1 chars read */ /* or EOF occurs */ /* arguments: */ /* Fin: FILE pointer to input stream */ /* max: array dimension [], max length including the final \0 */ /* line: string or char * */ /* return: the number of usable chars in the string (\0 not included) */ /* : or max if capacity was exceeded, line contains */ /* max-1 usable characters. */ /* : 0 if NEW LINE was encountered first, line starts with '\0' */ /* :-1 if EOF was encountered, line starts with '\0' */ /* -------------------------------------------------------------------- */ fgt_line (Fin,max,line) int max; char line[]; FILE *Fin; { int c, i; for (i=0; (c=fgetc(Fin))!= EOF && c!='\n' && i<(max-1);i++) line[i]=c; line[i]='\0'; /* replace NEW LINE or last by '\0', don't count */ if (c==EOF) return(-1); /* EOF encountered */ if (i==(max-1) && c!='\n') { while (fgetc(Fin)!='\n'); /* clean buffer */ return(max); } /* flag overflow */ return (i); } /* sgt_line do the same things from a string */ /* ------------------------------------------------------------------- */ /* MDtransp(c) Jean-Michel Claverie feb 1991 SGI */ /* Module to transpose a double matrix within itself */ /* ARGUMENTS: */ /* IN: M, pI, pJ, : M[I][J] float matrix, dimension I, J */ /* OUT: transposed M, NEW VALUES for calling I and J */ /* ------------------------------------------------------------------- */ int MDtransp(M,pI,pJ) double M[][MLIMIT]; int *pI,*pJ; { int i,j,It,Jt; double v; Jt= (*pI); It= (*pJ); if (It>=Jt) { for (i=0 ; i< Jt; i++) for (j=i+1; j< It; j++) { v=M[j][i]; M[j][i]=M[i][j]; M[i][j]=v; } } else { for (j=0 ; j< It; j++) for (i=j+1; i< Jt; i++) { v=M[j][i]; M[j][i]=M[i][j]; M[i][j]=v; } } *pI=It; *pJ=Jt; } /* ------------------------------------------------------------------- */ /* int gt_1stword() Jean-Michel Claverie feb 1991 SGI */ /* module to read the first word of a closed string */ /* ARGUMENTS */ /* IN: line, a closed string */ /* OUT: word, a closed string, WITH sufficient space allocation */ /* RETURN: next: the relative position of the first characters NOT */ /* belonging to the first word in the string */ /* thus, no more word in the string returns 0 (false) */ /* Note : the next call can then use : line + next, as a pointer to */ /* initiate the search for the next 1st word in the string */ /* -------------------------------------------------------------------- */ int gt_1stword (line,word) char *line, *word; { char *p; for ( p=line; isspace(*p) ; p++); /* skeep leading blanks */ if (*p == '\0') return (0) ; /* no word in line */ while (!isspace(*p) && *p!='\0'){ *word =(*p) ; word++; p++;} *word='\0'; return (p-line); }