/* finddupe.c */
/* small program to find duplicate files */
/* written by George Shearer */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <limits.h>

/* a few globals */
struct filelist {
   char filename[150];
   off_t size;
   ino_t inode;
   nlink_t nlinks;
   char ref;
   struct filelist *next;
} *fl=(struct filelist *)0,*temp,*first=(struct filelist *)0;

int count=0,verbose;
unsigned long int wasted; /* wasted disk space */
dev_t filesys; /* used for filesystem skipping */
char *fullpath=0;

int diffs(unsigned char *s1,unsigned char *s2,int x) {
   while(x-- && (unsigned char)*s1++==(unsigned char)*s2++);
   return(!(x<1&&(unsigned char)*(s1-1)==(unsigned char)*(s2-1)));
}

int diff(char *file1,char *file2) {
   int fd1=-1,fd2=-1,sr1,sr2,ret=0;
   unsigned char buf1[1024],buf2[1024];

   if(((fd1=open(file1,O_RDONLY))==-1)||((fd2=open(file2,O_RDONLY))==-1)) {
     fprintf(stderr,"Unable to compare %s & %s.\n",file1,file2);
     ret++;
   }
   else
     while(1) {
       if(((sr1=read(fd1,(unsigned char *)buf1,1024))==-1)||((sr2=read(fd2,(unsigned char *)buf2,1024))==-1)) {
         fprintf(stderr,"Read error: %s\n",(sr1==-1) ? buf1 : buf2);
         break;
       }
       if(sr1==0 && sr2==0) {
         ret=0;
         break;
       }
       if(diffs((unsigned char *)buf1,(unsigned char *)buf2,sr1)) {
         ret=1;
         break;
       }
     }
   if(fd1!=-1)
     close(fd1);
   if(fd2!=-1)
     close(fd2);
   return(ret);
}

void freeall(void) {
   fl=first;
   while(fl) {
     temp=fl->next;
     free(fl);
     fl=temp;
   }
   if(fullpath)
     free(fullpath);
}

void problemo(const char *fmt, ...) {
   va_list ap;
   va_start(ap,fmt);
   vprintf(fmt,ap);
   va_end(ap);

   freeall();
   exit(0);
}

/* malloc()'s another struct, loads the file info & increments */
void loadfile(char *pathname, const struct stat *fstats) {
   if(S_ISREG(fstats->st_mode) && ((!fstats->st_size && verbose>4) || fstats->st_size)) { /* only read regular files */
     if(!(temp=malloc(sizeof(struct filelist))))
       problemo("Out of memory");
     first=(!fl) ? temp : first;
     fl=(!fl) ? temp : fl;
     fl->next=(!fl) ? (struct filelist *)0 : temp;
     fl=temp;
     fl->next=(struct filelist *)0;

     strcpy(fl->filename,pathname);
     fl->size=fstats->st_size;
     fl->inode=fstats->st_ino;
     fl->nlinks=fstats->st_nlink;
     fl->ref=0;
   }
}

int dopath(void) {
   struct stat fstats;
   struct dirent *dirp;
   DIR *dp;
   char *ptr;

   if(lstat(fullpath,&fstats)>-1) {
     if(!S_ISDIR(fstats.st_mode)) {
       loadfile(fullpath,&fstats);
       count++;
     }
     else {
       if(!filesys)
         filesys=fstats.st_dev;
       else
         if(filesys!=fstats.st_dev) /* stay in file system */
           return(0);
       ptr=fullpath+strlen(fullpath);
       *ptr=(*(ptr-1)=='/') ? 0 : '/';
       ptr=(*ptr) ? ptr+1 : ptr;
       *ptr=0;

       if((dp=opendir(fullpath))!=NULL) {
         while((dirp=readdir(dp))!=NULL) {
           if(!strcmp(dirp->d_name,".") || !strcmp(dirp->d_name,".."))
             continue;
           strcpy(ptr,dirp->d_name);
           dopath();
         }
         ptr[-1]=0;
         if(closedir(dp)<0)
           problemo("Can't close directory: %s\n",fullpath);
       }
     }
     return(1);
   }
   return(0);
}

void main(int argc,char *argv[]) {
   int x=0,i;

   if((argc<2) || (argv[1][0]=='-' && tolower(argv[1][1])!='v'))
     problemo("FindDupe v1.3\nWritten by George Shearer.\n\nUsage: finddupe [-vvvvv] path [path ...]\nWhere: path is the location of tree to search for duplicate files.\n");

   while(argv[1][verbose+1]=='v' && verbose<6)
     verbose++;

   fullpath=malloc(PATH_MAX+NAME_MAX);

   for(i=(verbose) ? 2 : 1;i<argc;i++) {
     if(verbose && verbose!=3) {
       printf("Scanning %-20s...",argv[i]);
       fflush(stdout);
     }
     strcpy(fullpath,argv[i]);
     x=count;
     filesys=(dev_t)0;
     if(!dopath() && verbose!=3)
       puts("Unable to lstat dir! not there?");
     else
       if(verbose && verbose!=3)
         printf("loaded %d file%s.\n",count-x,(x!=1) ? "s" : "");
   }

   x=i=0;
   fl=(struct filelist *)first;

   while(fl) {
     for(temp=fl->next;temp;temp=temp->next) {
       if(temp->size==fl->size) {
         int difft;

         if(fl->inode==temp->inode || fl->ref || temp->ref) { /* exclude hard links */
           fl->ref++;
           temp->ref++;
           continue;
         }
         
         x+=difft=(!diff(fl->filename,temp->filename)) ? 1 : 0;

         wasted+=(difft) ? fl->size : 0;

         if(verbose>3 || (verbose>1 && difft)) {
           if(!i++ && verbose!=3)
             printf("\nSource pathname             Target pathname               Status    Byte size\n-----------------------------------------------------------------------------\n");
           if(verbose==3) {
             if(difft)
               printf("%s %s\n",fl->filename,temp->filename);
           }
           else
             printf("%-27s %-27s (%s) (%d)\n",fl->filename,temp->filename,
                    (difft) ? "identical" : "different",fl->size);
         }
       }
     }
     fl=fl->next;
   }

   freeall();

   if(verbose!=3) {
     printf("%s%d possible duplicate%s out of %d file%s.\n",(verbose>1) ? "\n" : "",x,(x!=1) ? "s" : "",count,(count!=1) ? "s" : "");
     if(x)
       printf("%lu bytes in use by duplicate files.\n",wasted);
   }
}
