/* distalign.c
 * PDB Distance to MSF Alignment
 * Roger Sayle & Mansoor Saqi
 * January 1996
 * Version 1.2
 */

#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>

#ifndef True
#define True  1
#define False 0
#endif

#ifndef IBMPC
#define _fmalloc  malloc
#define _ffree    free
#define __huge
#define __far
#endif

#define LogTwo  0.69314718


typedef struct _SeqRecord {
        struct _SeqRecord *next;
        char __far *seq;
        char name[16];
        double weight;
        int namelen;
        int len;
    } SeqRecord;


static SeqRecord *SeqData;
static SeqRecord *SeqFirst;
static char __far *Sequence;
static char __far *Distance;
static int __far *Offset;
static int MaxOffset,Chain;

static SeqRecord *QueryPtr;
static int QueryLen;
static int QueryPos;

static double gapweight;
static double gaplenwgt;
static int aligncount;
static int alignlen;

static double Threshold;
static int ZvelebilFlag;
static int PairwiseFlag;
static int EntropyFlag;

static char *msfname;
static char *pdbname;
static char *outname;


static int ReadLine( file, buffer )
    FILE *file; char *buffer;
{
    register char *ptr;
    register int len;
    register int ch;

    if( feof(file) )
    {   *buffer = 0;
        return( False );
    }

    ptr = buffer;
    do {
        ch = getc(file);
        if( (ch=='\n') || (ch=='\r') )
        {   if( ptr != buffer )
            {   *ptr = 0;
                return( True );
            }
        } else if( ch==EOF )
        {   *ptr = 0;
            return( True );
        } else *ptr++ = ch;
    } while( ptr < buffer+80 );
    *ptr = 0;

    /* skip to the end of the line! */
    do { ch = getc(file);
    } while( (ch!='\n') && (ch!='\r') && (ch!=EOF) );
    return( True );
}


static void ParseMSFFile()
{
    static int len;
    static char buffer[82];
    register SeqRecord **prev;
    register SeqRecord *ptr;
    register int i, ch, pos;
    register FILE *infile;
    register char *cptr;

    if( !(infile=fopen(msfname,"r")) )
    {   fprintf(stderr,"Error: Unable to open MSF file %s!\n",msfname);
        exit(1);
    }

    /* GCG defaults! */
    gapweight = 3.0;
    gaplenwgt = 0.1;

    while( ReadLine(infile,buffer) && strncmp(buffer," Name:",6) )
    {    if( !strncmp(buffer,"                   GapWeight:",30) )
         {   sscanf(buffer+29,"%lg",&gapweight);
         } else if( !strncmp(buffer,"             GapLengthWeight:",30) )
             sscanf(buffer+29,"%lg",&gaplenwgt);
    }

    SeqData = NULL;
    aligncount = 0;
    prev = &SeqData;

    while( !strncmp(buffer," Name:",6) )
    {   if( !(ptr=(SeqRecord*)malloc(sizeof(SeqRecord))) )
        {   fputs("Error: Unable to allocate memory!\n",stderr);
            exit(1);
        }
        *prev = ptr;  prev = &ptr->next;
        for( i=0; i<16; i++ )
            ptr->name[i] = buffer[i+7];

        ptr->namelen = 16;
        for( i=0; i<16; i++ )
            if( ptr->name[i] == ' ' )
            {   ptr->name[i] = '\0';
                ptr->namelen = i;
                break;
            }

        ptr->next = NULL;
        ptr->len = 0;
        aligncount++;

        cptr=buffer+(ptr->namelen+7);
        while( *cptr )
            if( !strncmp(cptr,"Len: ",5) )
            {   sscanf(cptr+5,"%d",&alignlen);
                break;
            } else cptr++;

        ptr->weight = 0.0;
        cptr=buffer+(ptr->namelen+7);
        while( *cptr )
            if( !strncmp(cptr,"Weight: ",8) )
            {   sscanf(cptr+8,"%lg",&ptr->weight);
                break;
            } else cptr++;

        if( !(ptr->seq = (char __far*)_fmalloc(alignlen)) )
        {   fputs("Error: Unable to allocate sequence memory!\n",stderr);
            exit(1);
        }

        if( !ReadLine(infile,buffer) )
            break;
    }

    while( strncmp(buffer,"//",2) )
        if( !ReadLine(infile,buffer) )
            break;

    if( !aligncount )
    {   fputs("Error: No alignment sequences found!\n",stderr); exit(1);
    } else fprintf(stderr,"Number of sequences ...... %d\n",aligncount);
    fprintf(stderr,"Alignment length ......... %d\n",alignlen);

    if( !ReadLine(infile,buffer) )
        return;

    while( True )
    {   while( True )
        {   for( cptr=buffer; *cptr; cptr++ )
                if( isalpha(*cptr) || (*cptr=='.') || (*cptr=='-') )
                    break;
            if( *cptr || !ReadLine(infile,buffer) ) 
                break;
        } 

        if( !buffer[0] ) break;
        for( ptr=SeqData; ptr; ptr=ptr->next )
        {   /* Skip sequence name! */
            cptr = buffer;
            while( *cptr && *cptr==' ' )
                cptr++;

            while( *cptr && *cptr!=' ' )
                cptr++;

            while( ch = *cptr++ )
            {   if( islower(ch) )
                    ch = toupper(ch);

                if( (ch>='A') && (ch<='Z') ) 
                {   if( ptr->len < alignlen )
                    {   ptr->seq[ptr->len++] = ch;
                    } /* else error; */
                } else if( (ch=='.') || (ch=='-') )
                    if( ptr->len < alignlen )
                    {   ptr->seq[ptr->len++] = '.';
                    } /* else error; */
            }

            if( !ReadLine(infile,buffer) )
                break;
        }
    }
    fclose(infile);
}


static int IdentifyResidue( ptr )
    char *ptr;
{
    switch( *ptr )
    {   case('A'): if( ptr[1]=='L' )
                   {   if( ptr[2]=='A' )
                           return( 'A' );  /* ALA */
                   } else if( ptr[1]=='S' )
                   {   if( ptr[2]=='P' )
                       {   return( 'D' );  /* ASP */
                       } else if( ptr[2]=='N' )
                       {   return( 'N' );  /* ASN */
                       } else if( ptr[2]=='X' )
                           return( 'B' );  /* ASX */
                   } else if( (ptr[1]=='R') && (ptr[2]=='G') )
                       return( 'R' );      /* ARG */
                   break;

        case('G'): if( ptr[1]=='L' )
                   {   if( ptr[2]=='Y' )
                       {   return( 'G' );  /* GLY */
                       } else if( ptr[2]=='U' )
                       {   return( 'E' );  /* GLU */
                       } else if( ptr[2]=='N' )
                       {   return( 'Q' );  /* GLN */
                       } else if( ptr[2]=='X' )
                           return( 'Z' );  /* GLX */
                   }
                   break;

        case('L'): if( ptr[1]=='E' )
                   {   if( ptr[2]=='U' )
                           return( 'L' );  /* LEU */
                   } else if( (ptr[1]=='Y') && (ptr[2]=='S') )
                       return( 'K' );      /* LYS */
                   
                   break;

        case('S'): if( (ptr[1]=='E') && (ptr[2]=='R') )
                       return( 'S' );      /* SER */
                   break;

        case('V'): if( (ptr[1]=='A') && (ptr[2]=='L') )
                       return( 'V' );      /* VAL */
                   break;

        case('T'): if( ptr[1]=='H' )
                   {   if( ptr[2]=='R' )
                           return( 'T' );  /* THR */
                   } else if( ptr[1]=='Y' )
                   {   if( ptr[2]=='R' )
                           return( 'Y' );  /* TYR */
                   } else if( ptr[1]=='R' )
                       if( (ptr[2]=='P') || (ptr[2]=='Y') ) 
                           return( 'W' );  /* TRP, TRY */
                   break;

        case('I'): if( (ptr[1]=='L') && (ptr[2]=='E') )
                       return( 'I' );      /* ILE */
                   break;

        case('P'): if( ptr[1]=='R' ) 
                   {   if( ptr[2]=='O' )
                           return( 'P' );  /* PRO */
                   } else if( ptr[1]=='H' ) 
                   {   if( ptr[2]=='E' )
                           return( 'F' );  /* PHE */
                   } else if( (ptr[1]=='C') && (ptr[2]=='A') )
                       return( 'P' );      /* PCA */
                   break;

        case('H'): if( ptr[1]=='I' ) 
                   {   if( ptr[2]=='S' )
                           return( 'H' );  /* HIS */
                   } else if( (ptr[1]=='Y') && (ptr[2]=='P') )
                       return( 'P' );      /* HYP (hydroxyproline!) */
                   break;

        case('C'): if( ptr[1]=='Y' )
                   {   if( (ptr[2]=='S') || (ptr[2]=='H') ) 
                           return( 'C' );  /* CYS, CYH */
                   } else if( ptr[1]=='S' )
                   {   if( (ptr[2]=='H') || (ptr[2]=='M') )
                           return( 'C' );  /* CSH, CSM */
                   } else if( (ptr[1]=='P') && (ptr[2]=='R') )
                       return( 'P' );      /* CPR (cis-proline!) */
                   break;

        case('M'): if( (ptr[1]=='E') && (ptr[2]=='T') )
                       return( 'M' );      /* MET */
                   break;
    }
    return( False );
}

static double GetPDBFloat( ptr )
    char *ptr;
{
    static char temp[8];
    static double dist;
    register int i;

    for( i=0; i<6; i++ )
        temp[i] = *ptr++;
    temp[i] = '\0';

    dist = 0.0;
    sscanf(temp,"%lg",&dist);
    return(dist);
}


static char GetDistanceChar( dist )
    double dist;
{
    if( dist >= Threshold )
        return(' ');
    return( floor((10.0*dist)/Threshold) + '0' );
}

static void ParsePDBFile()
{
    register FILE *infile;
    register int prev,init;
    register double dist;
    register double best;
    register char insert;
    register char code;
    register int start;
    char buffer[82];
    int resno;

    if( !(infile=fopen(pdbname,"r")) )
    {   fprintf(stderr,"Error: Unable to open PDB file %s!\n",pdbname);
        exit(1);
    }

    Offset = (int __far*)_fmalloc(alignlen*sizeof(int));
    Sequence = (char __far*)_fmalloc(alignlen);
    Distance = (char __far*)_fmalloc(alignlen);
    if( !Offset || !Sequence || !Distance )
    {   fputs("Error: Unable to allocate memory!\n",stderr);
        exit(1);
    }

    init = False;
    insert = ' ';
    Chain = 0;  QueryLen = 0;
    while( ReadLine(infile,buffer) )
    {   if( !strncmp(buffer,"ATOM",4) )
        {   if( Chain && (Chain!=buffer[21]) )
                continue;
            
            /* !strncmp(buffer+12," CA ",4) */
            sscanf(buffer+22,"%d",&resno);
            if( init && (resno==prev) )
                if( insert == buffer[26] )
                {   dist = GetPDBFloat(buffer+60);
                    if( dist < best )
                    {   Distance[QueryLen-1] = GetDistanceChar(dist);
                        best = dist;
                    }
                    continue;
                } else start--;

            if( code = IdentifyResidue(buffer+17) )
            {   if( !init )
                {   if( !Chain ) Chain = buffer[21];
                    start = resno;  init = True;
                } else if( resno-start > MaxOffset )
                    MaxOffset = resno-start;

                if( QueryLen<alignlen )
                {   best = GetPDBFloat(buffer+60);
                    Offset[QueryLen] = resno-start;
                    Distance[QueryLen] = GetDistanceChar(best);
                    Sequence[QueryLen] = code;
                    QueryLen++;

                } else /* Overflow! */
                {   fputs("Error: PDB sequence too long!\n",stderr);
                    exit(1);
                }
            }
            insert = buffer[26];
            prev = resno;

        } else if( !strncmp(buffer,"END ",4) ||
                   !strncmp(buffer,"ENDM",4) )
            break;
    }
    fprintf(stderr,"PDB sequence length ...... %d\n",MaxOffset);
    fprintf(stderr,"Number of PDB residues ... %d\n",QueryLen);
    fclose(infile);
}


static int SkipGap( ptr, pos )
    register SeqRecord *ptr;
    register int pos;
{
    register int ch;

    while( pos < ptr->len )
    {   ch = ptr->seq[pos];
        if( ch != '.' )
        {   return( pos );
        } else pos++;
    }
    return( alignlen );
}


static int MatchAmino( ch1, ch2 )
    register int ch1, ch2;
{
    if( ch1 != ch2 )
    {   if( ch1=='X' )
        {   return(True);
        } else if( ch1=='B' )
        {   return( (ch2=='D') || (ch2=='N') );
        } else if( ch1=='Z' )
            return( (ch2=='E') || (ch2=='Q') );

        if( ch2=='B' )
        {   return( (ch1=='D') || (ch1=='N') );
        } else if( ch2=='Z' )
            return( (ch1=='E') || (ch1=='Q') );

        return( False );
    } else return( True );
}


static int FindPDBSequence()
{
    register SeqRecord *ptr;
    register int count, start;
    register int previ;
    register int i, j, k;
    register char ch;

    count = 0;
    for( ptr=SeqData; ptr; ptr=ptr->next )
    {   start = SkipGap(ptr,0);
        while( start+MaxOffset < alignlen )
        {    i = start;  j = 0;
             for( k=0; k<QueryLen; k++ )
             {   previ = SkipGap(ptr,i+1);
                 while( j < Offset[k] )
                 {   i = SkipGap(ptr,i+1);
                     j++;
                 }

                 if( !MatchAmino(ptr->seq[i],Sequence[k]) )
                 {   if( MatchAmino(ptr->seq[previ],Sequence[k]) )
                     {   i = previ;
                     } else break;
                 }
             }

             if( k==QueryLen )
             {   if( !count )
                 {   QueryPos = start;
                     QueryPtr = ptr;
                     count = 1;
                 } else count++;
                 break;
             } else if( k > 10 )
             {   fprintf(stderr,"Match of %d at %d in %s\n",k,start,ptr->name);
                 fprintf(stderr,"  residue %c didn't match %c or %c\n",
                                Sequence[k],ptr->seq[i],ptr->seq[previ]);
             }

             start = SkipGap(ptr,start+1);
        }
    }
    return( count );
}


static void CreateDistanceSequence()
{
    register SeqRecord *ptr;
    register char *name;
    register int i,j,k;
    register int previ;

    if( !(ptr=(SeqRecord*)malloc(sizeof(SeqRecord))) )
    {   fputs("Error: Unable to allocate memory!\n",stderr);
        exit(1);
    }

    strcpy(ptr->name,"Distance");
    ptr->namelen = 8;
    ptr->weight = 1.0;
    ptr->len = alignlen;

    if( !(ptr->seq=(char*)malloc(alignlen)) )
    {   fputs("Error: Unable to allocate sequence memory!\n",stderr);
        exit(1);
    }

    /* Initialise Distance! */
    for( i=0; i<alignlen; i++ )
        ptr->seq[i] = '.';

    i = QueryPos;  j = 0;
    for( k=0; k<QueryLen; k++ )
    {   previ = i;
        while( j < Offset[k] )
        {   i = SkipGap(QueryPtr,i+1);
            j++;
        }

        if( !MatchAmino(QueryPtr->seq[i],Sequence[k]) )
        {   /* MatchAmino(QueryPtr->seq[previ],Sequence[k]) */
            i = previ+1;  
        }
        ptr->seq[i] = Distance[k];
    }
    ptr->next = SeqFirst;
    SeqFirst = ptr;
}


static int GetAminoIndex( ch )
    int ch;
{
    switch( ch )
    {   case('A'):  return( 0 );
        case('R'):  return( 1 );
        case('N'):  return( 2 );
        case('D'):  return( 3 );
        case('C'):  return( 4 );
        case('Q'):  return( 5 );
        case('E'):  return( 6 );
        case('G'):  return( 7 );
        case('H'):  return( 8 );
        case('I'):  return( 9 );
        case('L'):  return( 10 );
        case('K'):  return( 11 );
        case('M'):  return( 12 );
        case('F'):  return( 13 );
        case('P'):  return( 14 );
        case('S'):  return( 15 );
        case('T'):  return( 16 );
        case('W'):  return( 17 );
        case('Y'):  return( 18 );
        case('V'):  return( 19 );
        case('B'):  return( 20 );
        case('Z'):  return( 21 );
        case('X'):  return( 22 );
        case('.'):  return( 23 );
    }
    return( 22 );  /* X */
}


static double EntropyScore( pos )
    int pos;
{
    register double result;
    register double pi,logpi;
    register SeqRecord *ptr;
    register int i,j,ch;
    register int count;
    static int bin[20];

    for( i=0; i<20; i++ ) 
        bin[i] = 0;

    count = 0;
    for( ptr=SeqData; ptr; ptr=ptr->next )
        if( pos < ptr->len )
        {  i = GetAminoIndex( ptr->seq[pos] );
           if( i < 20 )
           {   bin[i]++;
               count++;
           }
        }

    result = 0.0;
    for( i=0; i<20; i++ )
        if( bin[i] )
        {   pi = (double)bin[i]/(double)count;
            logpi = log(pi)/LogTwo;
            result += pi*logpi;
        }
    return( -result );
}


static double ConservationScore( pos )
    int pos;
{
    register double result;
    register SeqRecord *ptr;
    register int score,count;
    register int ch;

    score = count = 0;
    ch = QueryPtr->seq[pos];
    for( ptr=SeqData; ptr; ptr=ptr->next )
        if( pos < ptr->len )
        {   if( MatchAmino(ch,ptr->seq[pos]) )
                score++;
            count++;
        }
    return( 1.0-(double)score/count );
}


static double PairwiseScore( pos )
    int pos;
{
    register double result;
    register SeqRecord *ptr1;
    register SeqRecord *ptr2;
    register int score,count;
    register int ch;

    score = count = 0;
    for( ptr1=SeqData; ptr1; ptr1=ptr1->next )
        if( pos < ptr1->len )
        {   ch = ptr1->seq[pos];
            for( ptr2=ptr1->next; ptr2; ptr2=ptr2->next )
                if( pos < ptr2->len )
                {   if( MatchAmino(ch,ptr2->seq[pos]) )
                        score++;
                    count++;
                }
        }

    return( 1.0-(double)score/count );
}


/* Note: APTiny     -> APSmall   */
/*       APPositive -> APCharged */
/*       APNegative -> APCharged */
/*       APProline  -> APSmall   */

#define APHydrophobic  0x001
#define APPositive     0x002
#define APNegative     0x004
#define APPolar        0x008
#define APCharged      0x010
#define APSmall        0x020
#define APTiny         0x040
#define APAliphatic    0x080
#define APAromatic     0x100
#define APProline      0x200
#define APAllProp      0x3FF

/* The following table is derived from Table 2, "Properties of
 * Amino Acid Residues" in M.Zvelebil, G.J.Barton, W.R.Taylor,
 * M.J.E.Sternberg, "Pediction of Protein Secondary Structure
 * and Active Sites using the Alignment of Homologous Proteins",
 * J.Mol.Biol, (1987), 195, p.957-961.
 */

static int AminoProps[24] = {
    /* A ALA */ APHydrophobic | APSmall | APTiny,
    /* R ARG */ APPositive | APPolar | APCharged,
    /* N ASN */ APPolar | APSmall,
    /* D ASP */ APNegative | APPolar | APCharged | APSmall,
    /* C CYS */ APHydrophobic | APSmall,
    /* Q GLN */ APPolar,
    /* E GLU */ APNegative | APPolar | APCharged,
    /* G GLY */ APHydrophobic | APSmall | APTiny,
    /* H HIS */ APHydrophobic | APPositive | APPolar | APCharged | APAromatic,
    /* I ILE */ APHydrophobic | APAliphatic,
    /* L LEU */ APHydrophobic | APAliphatic,
    /* K LYS */ APHydrophobic | APPositive | APPolar | APCharged,
    /* M MET */ APHydrophobic,
    /* F PHE */ APHydrophobic | APAromatic,
    /* P PRO */ APSmall | APProline,
    /* S SER */ APPolar | APSmall | APTiny,
    /* T THR */ APHydrophobic | APPolar | APSmall,
    /* W TRP */ APHydrophobic | APPolar | APAromatic,
    /* Y TYR */ APHydrophobic | APPolar | APAromatic,
    /* V VAL */ APHydrophobic | APSmall | APAliphatic,
    /* B ASX */ APPolar,
    /* Z GLX */ APPolar,
    /* X UNK */ APAllProp,
    /* . GAP */ APAllProp
        };

static double ZvelebilScore( pos )
    int pos;
{
    register double result;
    register SeqRecord *ptr;
    register int score,count;
    register int P,posb,negb;
    register int i,j,ch;
    register int mask;

    score = count = 0;
    ch = SeqData->seq[pos];
    for( ptr=SeqData->next; ptr; ptr=ptr->next )
    {   if( (pos<ptr->len) && (ch==ptr->seq[pos]) )
            score++;
        count++;
    }

    /* Test for complete conservation */
    if( score == count ) return( 0.0 );

    posb = 0;  negb = 0;
    for( ptr=SeqData; ptr; ptr=ptr->next )
    {   if( pos < ptr->len )
        {   mask = AminoProps[GetAminoIndex(ptr->seq[pos])];
        } else mask = APAllProp;

        negb |= ~mask;
        posb |= mask;
    }

    P = 0; j = 1;
    for( i=0; i<10; i++ )
    {   if( posb & negb & j )
            P++;
        j <<= 1;
    }

    if( P < 9 )
    {   return( 0.1*P + 0.1 );
    } else return( 1.0 );
}


static void CreateEntropySequence()
{
    register double entropy;
    register SeqRecord *ptr;
    register char *name;
    register int ch,i;

    if( !(ptr=(SeqRecord*)malloc(sizeof(SeqRecord))) )
    {   fputs("Error: Unable to allocate memory!\n",stderr);
        exit(1);
    }

    strcpy(ptr->name,"Entropy");
    ptr->namelen = 7;
    ptr->weight = 1.0;
    ptr->len = alignlen;

    if( !(ptr->seq=(char*)malloc(alignlen)) )
    {   fputs("Error: Unable to allocate sequence memory!\n",stderr);
        exit(1);
    }

    for( i=0; i<alignlen; i++ )
    {   entropy = EntropyScore(i);
        ch = floor(entropy*10.0/4.32) + '0';
        ptr->seq[i] = ch;
    }
    ptr->next = SeqFirst;
    SeqFirst = ptr;
}


static void CreatePairwiseSequence()
{
    register double score;
    register SeqRecord *ptr;
    register char *name;
    register int ch,i;

    if( !(ptr=(SeqRecord*)malloc(sizeof(SeqRecord))) )
    {   fputs("Error: Unable to allocate memory!\n",stderr);
        exit(1);
    }

    strcpy(ptr->name,"Pairwise");
    ptr->namelen = 8;
    ptr->weight = 1.0;
    ptr->len = alignlen;

    if( !(ptr->seq=(char*)malloc(alignlen)) )
    {   fputs("Error: Unable to allocate sequence memory!\n",stderr);
        exit(1);
    }

    for( i=0; i<alignlen; i++ )
    {   score = PairwiseScore(i);
        ch = floor(1.0*score) + '0';
        if( ch > '9' ) ch = '9';
        ptr->seq[i] = ch;
    }
    ptr->next = SeqFirst;
    SeqFirst = ptr;
}


static void CreateZvelebilSequence()
{
    register double score;
    register SeqRecord *ptr;
    register char *name;
    register int ch,i;

    if( !(ptr=(SeqRecord*)malloc(sizeof(SeqRecord))) )
    {   fputs("Error: Unable to allocate memory!\n",stderr);
        exit(1);
    }

    strcpy(ptr->name,"Zvelebil");
    ptr->namelen = 8;
    ptr->weight = 1.0;
    ptr->len = alignlen;

    if( !(ptr->seq=(char*)malloc(alignlen)) )
    {   fputs("Error: Unable to allocate sequence memory!\n",stderr);
        exit(1);
    }

    for( i=0; i<alignlen; i++ )
    {   score = ZvelebilScore(i);
        ch = floor(score*10.0) + '0';
        if( ch > '9' ) ch = '9';
        ptr->seq[i] = ch;
    }
    ptr->next = SeqFirst;
    SeqFirst = ptr;
}


static void WriteMSFFile()
{
    register SeqRecord *ptr;
    register FILE *outfile;
    register int i,j,max;
    register int namelen;
    register int cols;

    fputc('\n',stderr);

    if( strcmp(outname,"-") )
    {   if( !(outfile=fopen(outname,"w")) )
        {   fprintf(stderr,"Error: Unable to create file %s!\n",outfile);
            exit(1);
        }
        fprintf(outfile," %s",outname);
    } else 
    {   fputs(" noname.msf",stdout);
        outfile = stdout;
    }

    fprintf(outfile,"  MSF: %d  Type: P  Check: xxxx ..\n\n",alignlen);

    namelen = 0;
    for( ptr=SeqData; ptr; ptr=ptr->next )
        if( ptr->namelen > namelen ) 
            namelen = ptr->namelen;

    for( ptr=SeqData; ptr; ptr=ptr->next )
    {   fputs(" Name: ",outfile);
        cols = fprintf(outfile,"%.16s",ptr->name);
        while( cols++ < namelen ) fputc(' ',outfile);
        fprintf(outfile,"  Len: %5d  Check: xxxx  Weight: %5.2f\n",
                ptr->len,ptr->weight);
    }
    fputs("\n//\n\n",outfile);

    for( i=0; i<alignlen; i+=50 )
    {   max = i+50;
        if( alignlen < max )
            max = alignlen;

        cols = max-i;
        cols += (cols-1)/10;
        for( j=0; j<namelen+2; j++ )
            fputc(' ',outfile);
        cols -= fprintf(outfile,"%d",i+1);

        if( cols>5 )
        {   for( j=5; j<cols; j++ )
                fputc(' ',outfile);
            fprintf(outfile,"%5d\n",max);
        } else fputc('\n',outfile);

        for( ptr=SeqData; ptr; ptr=ptr->next )
        {   for( j=0; j<namelen-ptr->namelen; j++ )
                fputc(' ',outfile);
            for( j=0; j<ptr->namelen; j++ )
                fputc(ptr->name[j],outfile);
            fputc(' ',outfile);

            for( j=i; j<max; j++ )
            {   if( !(j%10) )
                    fputc(' ',outfile);
                fputc(ptr->seq[j],outfile);
            }
            fputc('\n',outfile);
        }
        fputc('\n',outfile);
    }

    if( outfile != stdout )
        fclose(outfile);
}


static void DisplayUsage()
{
    fputs("usage: distalign [-threshold <value>][-entropy] [-zvelebil]",stderr);
    fputs(" <pdbfile> <msffile> [<outfile>]\n",stderr);
    exit(1);
}

static void ProcessCommandLine( argc, argv )
    int argc; char *argv[];
{
    register char *ptr;
    register int i,j;

    ZvelebilFlag = False;
    PairwiseFlag = False;
    EntropyFlag = False;

    Threshold = 10.0;
    outname = "-";

    j = 0;
    for( i=1; i<argc; i++ )
        if( argv[i][0] == '-' )
        {   if( !strcmp(argv[i],"-threshold") )
            {   if( i == argc-1 )
                    DisplayUsage();
                sscanf(argv[++i],"%lg",&Threshold);
            } else if( !strcmp(argv[i],"-entropy") )
            {   EntropyFlag = True;
            } else if( !strcmp(argv[i],"-zvelebil") ||
                       !strcmp(argv[i],"-Zvelebil") )
            {   ZvelebilFlag = True;
            } else if( !strcmp(argv[i],"-pairwise") )
            {   PairwiseFlag = True;
            } else if( !strcmp(argv[i],"-entropy") )
            {   EntropyFlag = True;
            } else DisplayUsage();
        } else switch( j++ )
        {   case(0): pdbname = argv[i];  break;
            case(1): msfname = argv[i];  break;
            case(2): outname = argv[i];  break;
            default: DisplayUsage();
        }

    if( j<2 )
        DisplayUsage();
}


int main( argc, argv )
    int argc; char *argv[];
{
    register int result;
    register int i;

    fputs("PDB Distance to MSF Alignment\n",stderr);
    fputs("R.Sayle & M.Saqi, January 1996\n",stderr);
    fputs("Version 1.2\n\n",stderr);

    ProcessCommandLine(argc,argv);
    ParseMSFFile();
    ParsePDBFile();

    if( !(result=FindPDBSequence()) )
    {   fputs("Error: No matching sequence found!\n",stderr);
        exit(1);
    }

    fprintf(stderr,"Matching MSF sequence .... %.10s\n",QueryPtr->name);
    fprintf(stderr,"Position in alignment .... %d\n",QueryPos+1);
    if( result > 1 )
        fputs("Warning: More than one matching sequence!\n",stderr);

    SeqFirst = SeqData;
    fprintf(stderr,"Threshold Distance ....... %lg\n",Threshold);
    CreateDistanceSequence();

    if( EntropyFlag )
        CreateEntropySequence();
    if( PairwiseFlag )
        CreatePairwiseSequence();
    if( ZvelebilFlag )
        CreateZvelebilSequence();

    SeqData = SeqFirst;
    WriteMSFFile();
    exit(0);
}

Dr. Mansoor Saqi                    Email mass15599@ggr.co.uk
Bioinformatics Group                Phone +44 (0)81 966 2417
Dept. of Biomolecular Structure
Glaxo Group Research
Greenford, Middlx, UK.

