#!/usr/bin/perl 
# Dis-assembles binary data and comments on it... Needs objdump installed
# This was created to analyze forensic binary executables but may also
# be helpfull when reverse engineering in general.
# Created 5/11/02 by Craig Smith (craig@AcademicUnderground.org)
# More info may be found here: www.AcademicUnderground.org/examiner
# (c) 2002 under the terms of the GPL
use strict;
use Env;
use Getopt::Std;
use File::Basename;
use vars qw($work_dir $COMMENT %function %header %constants $VERSION $verbose %interrupts %registers @rodata $data_section %options %stack_state);
# TCT VARS (Read README.TCT)
use vars qw($TCT_HOME $OBJDUMP $UPX $DRESS $UNCRIPPLE);
# Optionally loaded by the examiner library
use vars qw($loaded_library %syscalls %socketcall);

my $VERSION="0.5";			# Version number
my $COMMENT="#";			# Symbol for comment lines
my $OBJDUMP="/usr/bin/objdump";		# path to objdump command
my $UPX="/usr/bin/upx";			# Optional (for compressed files)
my $DRESS="/usr/bin/dress";		# Optional (for stripped files)
my $FILE="/usr/bin/file";		# path to file command
my $UNISTD="/usr/src/linux/include/asm/unistd.h";
my $NET_H="/usr/include/linux/net.h";	# used for socketcall() info
my $TCT_CONF="/etc/tct/coroner.conf";	# Optional TCT installation
my $EX_LIB="examiner_hashes.pl";	# Optional library
my $EX_SHARE="/usr/share/examiner"; 	# Path to above library

my $work_dir="$HOME/examiner-data";	# Working dir path
my $verbose=0;				# Verbosity level
my $target="";				# Target binary
my $filename;				# Basename of target
my $output_file;			# Commented filename
my $file_type="";			# target binary type
my $OS="";				# OS of executable
my @headers;				# binary header sections
my %function;				# Hash of function_addr/names
my %header;				# Header addresses and sizes
my %constants;				# Directly reference memory
my %interrupts;				# All interrupt calls
my %registers;				# Maintains state of registers
my @rodata;				# Contents of .rodata section
my $total_functions;			# Total number of functions
my $total_id_funct;			# Total identified funtions
my $total_lines;			# Totol lines of objdump srcdump
my $summary;				# Display summary results?
my $dump_headers;			# Dump header files?
my $create_references;			# Create reference files?
my $quite;				# Supress output
my %options;				# Option Hash for getopts
my $ONLY_KNOWN_FUNCT=1;			# Comment on generic functions?

getopts("svVqCHRLo:c:d:hx:", \%options) || usage();

if ($options{"v"}) { $verbose++; }
if ($options{"s"}) { $summary = 1; }
if ($options{"H"}) { $dump_headers = 1; }
if ($options{"R"}) { $create_references = 1; }
if ($options{"C"}) { $TCT_CONF = $options{"C"}; }
if ($options{"c"}) { $COMMENT = $options{"c"}; }
if ($options{"q"}) { $quite = 1; }
if ($options{"h"}) { usage(); }
if ($options{"d"}) { $work_dir = $options{"d"}; }
if ($options{"o"}) { $output_file = $options{"o"}; }
if ($options{"x"}) { $target = $options{"x"}; }
if ($options{"V"}) { print "$0 Version v$VERSION\n"; exit(0); }

if (!$target) { print "\nMust specify file to dissassemble!\n"; usage(); }
$filename=basename($target);

# Check for TCT installtaions
if(-r $TCT_CONF) {
	require "$TCT_CONF";
	$work_dir = "$TCT_HOME/examiner" if !$options{"d"};
	print "Found and loaded TCT configurations\n" if $verbose;
}

### BASIC SANITY CHECKS ###
if (! -x $OBJDUMP) {
	print "Couldn't find objdump command.  Expected it here: $OBJDUMP\n";
	print "If you have it installed in a different path plese modify the\n";
	print "\$OBJDUMP variable in this code.\n";
	exit (1);
}
if (! -x $FILE) {
	print "Couldn't find file command.  Expected it here: $FILE\n";
	print "If you have it installed in a different path plese modify the\n";
	print "\$FILE variable in this code.\n";
	exit (1);
}
if (! -r $target) {
	print "Could not read binary file $target : $!";
	exit (1);
}
$|=1;
$file_type = `$FILE $target`;

### Load optional hashes and header files ###
# Check for optional hashes for syscalls
require "./$EX_LIB" if -r "./$EX_LIB";
require "/usr/share/tct/$EX_LIB" if -r "/usr/share/tct/$EX_LIB" && !$loaded_library;
require "$EX_SHARE/os/bsd/$EX_LIB" if -r "$EX_SHARE/os/bsd/$EX_LIB" && $file_type=~/NetBSD/ && !$loaded_library;
require "$EX_SHARE/os/linux/$EX_LIB" if -r "$EX_SHARE/os/linux/$EX_LIB" && !$loaded_library;
print "Loaded examiner_hashes library\n" if $verbose && $loaded_library;

# Check for header files if no syscalls loaded
if(! -r $UNISTD && !keys(%syscalls)) {
	$UNISTD="/usr/include/asm/unistd.h" if -r "/usr/include/asm/unistd.h";
	$UNISTD="/usr/include/sys/syscall.h" if -r "/usr/include/sys/syscall.h";
	if(! -r $UNISTD) {
		print "Couldn't read header file $UNISTD\n";
		print "I need to know the the system call numbers in order to comment\n";
		print "the interrupt calls.  You could either locate the proper header\n";
		print "file or use an examiner_hashes.pl file.\n";
		exit(1);
	}
}


### PHASE 1 ###  Dump data
print "PHASE 1 - Dumping data from $target\n" if !$quite;
if(! -d $work_dir) {
	if(!mkdir $work_dir) {
		print "Couldn't create work directory ($work_dir) : $!\n";
		exit (1);
	}
}
if (!($OS=init_type($file_type))) {
	print "Binary file is not a known executable type.\n$file_type\n";
	exit (1);
}
print "Target binary is $OS executable.\n" if $verbose;

# Check for encoders
$target=check_for_upx($target);
# Check for crippled elf headers
check_for_crippled($target);

# Re-check file types in case something has changed
$file_type = `$FILE $target`;
$OS=init_type($file_type);

if(!(@headers=parse_headers($OBJDUMP, $target))) {
	print "Couldn't parse section headers\n";
	exit (1);
}
if($OS=~/dynamic/) {
	dump_dyn_symbol_table($OBJDUMP, $target);
} elsif($OS=~/stripped/) {
	if(-x $DRESS) {
		$target=dress_binary($DRESS, $target);
	} elsif ($verbose) {
		print "NOTE:\n";
		print "This binary is statically linked and stripped.";
		print "You can get better results\n";
		print "on function name resolution if you also have";
		print "the fenris utility dress(1)\n";
		print "installed.  The location of dress can be specified in";
		print "coroner.conf if examiner can't";
		print "find it\n";
	}
}
if(dump_binary($OBJDUMP, $target, @headers) != 0) {
	print "Errors where encontered when dumping binary data.\n";
	exit(1);
}
$filename=basename($target);

### PHASE 2 ###  First pass over data
print "PHASE 2 - Initial pass of dumped data\n" if !$quite;
$total_lines=fact_finder("$work_dir/$filename.dump");
load_rodata($OBJDUMP, "$target");
load_data($OBJDUMP, "$target");

### PHASE 3 ### Analyze Collected Data
print "PHASE 3 - Analyze collected data\n" if !$quite;
$total_functions=init_function_names();
$total_id_funct=match_int_funct($UNISTD);
detail_dup_funct($NET_H);

### PHASE 4 ### Write commented output
print "PHASE 4 - Generate commented dissassembled source (takes a while)...\n" if !$quite;
create_section_boundry("$work_dir/$filename.dump.sections") if $create_references;
update_function_list("$work_dir/$filename.dump.functions") if $create_references;
create_int_references("$work_dir/$filename.dump.interrupts") if $create_references;
comment_lines("$work_dir/$filename.dump", $output_file);

### END ###
if($summary) {
  ($total_id_funct, $total_functions)=count_functions();
  print "\n   ___..oooOOO[ Summary ]OOOooo..___\n";
  print "   $total_lines lines of code were processed.\n";
  print "   $total_functions functions were located.\n";
  print "   Of those, $total_id_funct were successfully identified.\n";
  printf ("   Function Ratio: %d\%\n", int($total_id_funct/$total_functions*100));
}
$output_file = "$work_dir/$filename.dump.commented" if !$output_file;
print "Commented code can be found here: $output_file\n" if !$quite && $output_file ne "-";

exit(0);
##### Subs ###

# Checks for crippled ELF headers
sub check_for_crippled {
  my $target = shift;
  my $shoff;
  my $shnum;
  my $shentsize;
  my $err;
  open TARGET, $target;
  seek(TARGET,32,0);
  read(TARGET,$shoff,2);
  seek(TARGET,46,0);
  read(TARGET,$shnum,2);
  read(TARGET,$shentsize,2);
  close TARGET;
  $shoff=unpack("h4", $shoff);
  $shnum=unpack("c2", $shnum);
  $shentsize=unpack("c2", $shentsize);
  if($shoff == 0 && $shnum == 0 && $shentsize == 0) {
	if (-x $UNCRIPPLE) {
		print "ELF was crippled but attempting to uncripple now.\n";
		$err=system("$UNCRIPPLE $target >/dev/null");
		if($err > 0) {
			print "Error occured when uncrippling!\n";
			return;
		}
	} else {
		print "It appears the $target is a crippled ELF file.  You may\n";
		print "want to uncripple this file.  A utility is provided with\n";
		print "this suite that may help (uncripple.c).  If you want Examiner\n";
		print "to uncripple automatically then you need to set a variable in\n";
		print "coroner.conf (eg. \$UNCRIPPLE=\"/usr/sbin/uncripple\")\n";
	}
  }
}

# Checks for UPX compressed binaries
sub check_for_upx {
  my $target = shift;
  my $rptfile = basename($target);
  my $magic;
  my $ans;
  my $err;
  my $buf;
  my $upx_version;
  my $target_upx_version;
  my $loader_size;
  my ($version,$format,$method,$level,$u_adler,$c_adler);  # UPX variables
  my ($u_len,$c_len,$u_file_size);
  my %upx_format = (
  	1 => "DOS COM",
	2 => "DOS SYS",
	3 => "DOS EXE",
	4 => "DJGPP2 COFF",
	5 => "WC LE",
	6 => "VXD LE",
	7 => "DOS EXEH",
	8 => "TNT ADAM",
	9 => "WINT32 PE",
	10 => "LINUX i386",
	11 => "WIN16 NE",
	12 => "LINUX ELF i386",
	13 => "LINUX SEP i386",
	14 => "LINUX SH i386",
	15 => "VMLINUZ i386",
	16 => "BVMLINUZ i386",
	17 => "ELKS 8086",
	129 => "ATARI TOS",
	130 => "SOLARIS SPARC",
  );
  my %upx_method = (
  	2 => "NRV2B_LE32",
	3 => "NRV2B_8",
	4 => "NRV2B_LE16",
	5 => "NRV2D_LE32",
	6 => "NRV2D_8",
	7 => "NRV2D_LE16",
	8 => "NRV2E_LE32",
	9 => "NRV2E_8",
	10 => "NRV2E_LE16",
  );
  open TARGET, "$target";
  seek(TARGET, 121, 0);
  read(TARGET, $magic, 3); # Technically we skip the 7f part of the number
  read(TARGET, $loader_size, 2); # Loader Size?
  if($magic=~  /^UPX/) {
  	open UPXINFO, ">$work_dir/$rptfile.UPXinfo";
  	read(TARGET, $version, 1);
	read(TARGET, $format, 1);
	# Scan the next 200 bytes for an ASCII version number
	read(TARGET, $buf, 200);
	if($buf=~/Id: UPX (\d*)\.(\d*) Copyright/) {
		$target_upx_version="$1.$2";
	}
	$version=unpack("c2", $version);
	$format=unpack("c2", $format);
	print UPXINFO "UPX information for $target\n";
	print UPXINFO "====================================\n";
	print UPXINFO "Compressed with UPX v$target_upx_version\n" if $target_upx_version;
	print UPXINFO "Version: $version\n";
	$format=$upx_format{$format} if $upx_format{$format};
	print UPXINFO "Format: $format\n";
	print UPXINFO "--- [ Info from the tailend ] ---\n";
	($version,$format,$method,$level)=get_upx_tail(\*TARGET);	
	print UPXINFO "Version: $version (Should be same as above)\n";
	$format=$upx_format{$format} if $upx_format{$format};
	print UPXINFO "Format: $format (Should be same as above)\n";
	$method=$upx_method{$method} if $upx_method{$method};
	print UPXINFO "Method Used: $method\n";
	print UPXINFO "Level of Compresion: $level\n";
	print UPXINFO "\n";
	close UPXINFO;
	if(-x $UPX) {
		# Clear away any other decoded files because they'll cause upx to fail
		unlink("$work_dir/$rptfile.decoded") if (-r "$work_dir/$rptfile.decoded");
		print "UPX encoded executable!  Decoding...\n";
		$err=system("$UPX -q --no-progress -d $target -o $work_dir/$rptfile.decoded &>/dev/null");
		if($err || ! -r "$work_dir/$rptfile.decoded") {
			print "Couldn't decompress file($err).  Exiting...\n";
			$upx_version=`$UPX -V`;
			if($upx_version=~/^upx (\d*)\.(\d*)/) {
				$upx_version="$1.$2";
			}
			if(!$target_upx_version) {
				print "I was unable to determine what the upx version infomation was for the\n";
				print "Target binary.  This file is probably not a true UPX file.\n";
			}
			elsif($version == 255) {
				print "This binary uses an undocumented \"feature\" that disables decompression\n";
				print "By setting the version to 0xff.  You have two options from here:\n";
				print "Either modify packer.c in the upx source code or modify the header (The former is easier ;-)\n";
			}
			elsif($upx_version ne $target_upx_version) {
				print "This could simply be a version mismatch.  You have UPX v$upx_version\n";
				print "And the target was compressed with UPX v$target_upx_version.\n";
				print "Try downloading that exact version to decompress it.\n";
			} else {
				print "Both your UPX version and the target's reported versions are the same.\n";
				print "This file is likely tampered with or corrupted.\n";
			}
			print "More information can be found in $work_dir/$rptfile.UPXinfo\n";
			exit (1);
		}
		$target="$work_dir/$rptfile.decoded";
		print "New target is now $target\n" if $verbose;
	} else {
		print "This is a UPX compressed executable.  I will not\n";
		print "be able to analyze this file in this state and you do not\n";
		print "seem to have 'upx' installed.  To get it goto\n";
		print "http://upx.sourceforge.net\n";
		exit (1);
	}
  }
  close TARGET;
  return $target;
}

# Locates and returns UPX tail header
sub get_upx_tail {
  my $UPXFILE= shift;
  my $count;
  my $buf;
  my $found=0;
  my $version;
  my $format;
  my $method;
  my $level;
  my $u_len;
  my $c_len;
  my $u_adler;
  my $c_adler;
  my $filter;
  my $filter_cto;
  my $u_file_size;
   #We'll only check the end 200+ bytes
   # for sanity reasons
  for($count=-200;$count < 0 && !$found; $count++) {
  	seek($UPXFILE,$count,2);
	read($UPXFILE, $buf, 4);
	if($buf eq "UPX!") {
		$found=$count;
	}
  }
  if($found) {
	read($UPXFILE,$version,1);
	read($UPXFILE,$format,1);
	read($UPXFILE,$method,1);
	read($UPXFILE,$level,1);
	$version=unpack("c2", $version);
	$format=unpack("c2", $format);
	$method=unpack("c2", $method);
	$level=unpack("c2", $level);
	if($format < 128) {
		read($UPXFILE, $u_adler, 4);
		read($UPXFILE, $c_adler, 4);
		read($UPXFILE, $u_len, 4);
		read($UPXFILE, $c_len, 4);
		read($UPXFILE, $u_file_size, 4);
		read($UPXFILE, $filter, 1);
		read($UPXFILE, $filter_cto, 1);
	} else {
		read($UPXFILE, $u_len, 4);
		read($UPXFILE, $c_len, 4);
		read($UPXFILE, $u_adler, 4);
		read($UPXFILE, $c_adler, 4);
		read($UPXFILE, $u_file_size, 4);
		read($UPXFILE, $filter, 1);
		read($UPXFILE, $filter_cto, 1);
	}
  }
  return($version,$format,$method,$level);
}

# Adds comments to source dump on .rodata sections
sub comment_rodata {
  my $line = shift;
  my $addr;
  my $output;
  my $register;
	if($line=~/(\w*):(.*)push(\s*)\$0x(\w{7})/) {
		$addr=$4;
		if($constants{$addr} eq "rodata") {
			$output.="$COMMENT PUSH \"".return_rostring($addr,52)."\" on the stack\n";
		} 
	} elsif($line=~/(\w*):(.*)pushl(\s*)0x(\w{7})$/) {
		if($addr=data_ref_pointer($4)) {
			$output.="$COMMENT PUSH \"".return_rostring($addr,52)."\" on the stack\n";
		}
	} elsif($line=~/(\w*):(.*)mov(\s*)\$0x(\w{7}),\%(\w*)/) {
		$addr=$4;
		$register=uc($5);
		if($constants{$addr} eq "rodata") {
			$output.="$COMMENT MOVE \"".return_rostring($addr,52)."\" into $register\n";
		}
	} elsif($line=~/(\w*):(.*)mov(\s*)0x(\w{7}),\%(\w*)/) {
		$addr=$4;
		$register=uc($5);
		if($constants{$addr} eq "rodata") {
			$output.="$COMMENT MOVE \"".substr(return_rostring($addr,52),0,4)."\" into $register\n";
		}
	}
	$output.=$line;
  return $output;
}

# load rodata into memory as a big-ass array
sub load_rodata {
  my $OBJDUMP = shift;
  my $target = shift;
  my $filename=basename($target);
  my $line;
  my $col1;
  my $col2;
  my $col3;
  my $col4;
  my $hexdata;
  my $raw;
  return if !$header{"rodata"};
  print "Loading rodata into memory..." if $verbose;
  if (! -r "$work_dir/$filename.rodata") {
  	open RODATA, "$OBJDUMP -j .rodata -s $target 2>/dev/null|"
  } else {
  	open RODATA, "$work_dir/$filename.rodata";
  }
  while(<RODATA>) {
	$line = $_;
	if(substr($line,1,7)=~/8(\w{6})/) {
		$col1 = substr($line,9,8);
		$col2 = substr($line,18,8);
		$col3 = substr($line,27,8);
		$col4 = substr($line,36,8);
		$hexdata.=$col1.$col2.$col3.$col4;
	}
  }
  for (my $counter=0;$counter < length($hexdata); $counter+=2) {
	$raw=pack("H2", substr($hexdata,$counter,2));
	push @rodata, $raw;
  }
  close RODATA;
  print "done.\n" if $verbose;
}

# Loads array for .data
sub load_data {
  my $OBJDUMP=shift;
  my $target =shift;
  my $filename=basename($target);
  my $line;
  my $chunk;
  return if !$header{"data"};
  print "Loading .data into memory..." if $verbose;
  if (! -r "$work_dir/$filename.data") {
	open DATA, "$OBJDUMP -j .data -s $target 2>/dev/null|"
  } else {
        open DATA, "$work_dir/$filename.data";
  }
  while(<DATA>) {
	my $line=$_;
	$chunk=substr($line, 9, 35);
	if($chunk=~/(\w{8})/) {
		$chunk=~s/ //g;
		$data_section.=$chunk;
	}
  }
  close DATA;
  print "done\n" if $verbose;
}

# Check for constant variables in the code
sub check_constants {
  my $line = shift;
  my $constant;
  my $name;
  my $addr;
  my $counter=1;
	if( ($line=~/(\w*):(.*)push(\s*)\$0x(\w{7})/) ||
	    ($line=~/(\w*):(.*)pushl(\s*)0x(\w{7})$/) ||
	    ($line=~/(\w*):(.*)mov(\s*)\$0x(\w{7}),/) ||
	    ($line=~/(\w*):(.*)mov(\s*)0x(\w{7}),/)) {
		$addr=$1;
		$constant=$4;
		if(!($name=locate_section($constant))) {
			$counter++;
			$name="UNKNOWN_SECT$counter at 0x$addr";
		}
		$constants{$constant}=$name;
	}
}

# Attempts to resolve duplicate interrupt function names such as socketcall
sub detail_dup_funct {
   my $NET_H = shift;
   my $line;
   my $funct_name;
   my $int_addr;
   my $syscall_num;
   print "Attempting to detail duplicate function names..." if $verbose;
   if (! -r $NET_H && !keys(%socketcall)) {
   	print "failed. (No net.h)\n" if $verbose;
	return;
   }
   if(!keys(%socketcall)) {
   	open NETH, "$NET_H";
   	while(<NETH>) {
		$line=$_;
		if($line=~/\#define SYS_(\S*)(\s*)(\d*)/) {
			$socketcall{$3}=$1;
		}
   	}
   	close NETH;
   }

   foreach $int_addr (keys %interrupts) {
	# 102 == SOCKETCALL
	if(hex($interrupts{$int_addr}{"AX"}) == 102) {
		$syscall_num=hex $interrupts{$int_addr}{"DX"};
		if($socketcall{$syscall_num} eq "SOCKET") {
			$function{$interrupts{$int_addr}{"FUNCT"}}="SOCKET_FUNCT";
		} else {
			$function{$interrupts{$int_addr}{"FUNCT"}}="SOCKET_". $socketcall{$syscall_num} . "_FUNCT";
		}
	}	
   }
   print "done.\n" if $verbose;
   return;
}

# Counts total functions and known functions
sub count_functions {
   my $funct_addr;
   my $known;
   my $total;
   foreach $funct_addr (keys %function) {
	$total++;
	$known++ if $function{$funct_addr}!~/FUNCTION(\d*)/;
   }
   return ($known, $total);
}

# Lookups INT meanings and renames necessary functions
sub match_int_funct {
   my $UNISTD = shift;
   my $line;
   my $uniq;
   my $funct_name;
   my $funct_addr;
   my $int_addr;
   my $last_addr;
   my @all_functs;
   my $syscall_num;
   my %funct_hash;
   my $cnt=0;
   my $identified;
   my $match="__NR_";		# Used if parsing header files
   print "Analyzing interrupts and renaming valid functions..." if $verbose;
   if(!keys(%syscalls)) {
   	open SYSCALLS, "$UNISTD";
	$match="SYS_" if basename($UNISTD) eq "syscall.h";
   	while(<SYSCALLS>) {
		$line = $_;
		if($line=~/\#define $match(\S*)(\s*)(\d*)/) {
			$syscalls{$3}=$1;
		}
   	}
   	close SYSCALLS;
   }
   foreach $int_addr (keys %interrupts) {
		$int_addr;
		next if $interrupts{$int_addr}{"INT"} ne "80";
		$syscall_num = hex $interrupts{$int_addr}{"AX"};
		next if !$syscalls{$syscall_num};
		$funct_name = uc($syscalls{$syscall_num}) . "_FUNCT";
		$identified++;
		$cnt=1;
		$uniq=0;
		while($uniq == 0) {
			if(!$funct_hash{$funct_name}) {
				$funct_hash{$funct_name} = $int_addr;
				$uniq=1;
			} else {
				$cnt++;
				if($funct_name=~/(\d*)$/) {
					$funct_name=~s/(\d*)$/$cnt/;
				} else {
					$funct_name.=$cnt;
				}
			}
		}
   }
   @all_functs = sort keys %function; 
   foreach $funct_name (keys %funct_hash) {
   	$last_addr=0;
	foreach $funct_addr (@all_functs) {
		if (hex $funct_addr > hex $funct_hash{$funct_name}) {
			$function{$last_addr} = $funct_name;
			# This entry is for cross-referencing
			$interrupts{$funct_hash{$funct_name}}{"FUNCT"} = $last_addr;
		} else {
			$last_addr = $funct_addr;
		}
	}
   }
   print "done.\n" if $verbose;
   return $identified;
}

# Sorts and numbers functions
sub init_function_names {
   my @sorted_addrs;
   my $addr;
   my $funct_cnt;

   @sorted_addrs = sort keys %function;
   foreach $addr (@sorted_addrs) {
   	$funct_cnt++;
	$function{$addr}.="$funct_cnt" if $function{$addr}=~/FUNCTION/;
   }
   return $funct_cnt;
}

# Finds a value moved into AX
sub locate_register_state {
    my $line = shift;
    if ($line=~/mov(\s*)\$0x(\w*),\%eax/) {
    	$registers{"AX"}=$2;
    }
    if ($line=~/mov(\s*)\$0x(\w*),\%ebx/) {
	$registers{"BX"}=$2;
    }
    if ($line=~/mov(\s*)\$0x(\w*),\%ecx/) {
	$registers{"CX"}=$2;
    }
    if ($line=~/mov(\s*)\$0x(\w*),\%edx/) {
	$registers{"DX"}=$2;
    }
}

# Finds push statements
sub locate_stack_calls {
  my $line = shift;
  if ($line=~/push(\s*)\$0x(\w*)/) {
	push @{$registers{"STACK"}}, $2;
  } elsif($line=~/push(\s*)\%e(\w*)/) {
	push @{$registers{"STACK"}}, uc($2);
  } elsif($line=~/pushl(\s*)0x(\w*)/) {
  	push @{$registers{"STACK"}}, $2;
  } elsif($line=~/pop(\s*)/) {
	pop @{$registers{"STACK"}};
  }
}

# Finds interrupt calls and matching AX values
sub locate_interrupts {
   my $line = shift;
   my $int_addr;
   my $interrupt;
	if($line=~/(\w*):(.*)int(\s*)\$0x(\w*)/) {
		$int_addr=$1;
		$interrupt=$4;
		$interrupts{$int_addr}{"INT"}=$interrupt;
		$interrupts{$int_addr}{"AX"}=$registers{"AX"};
		$interrupts{$int_addr}{"BX"}=$registers{"BX"};
		$interrupts{$int_addr}{"CX"}=$registers{"CX"};
		$interrupts{$int_addr}{"DX"}=$registers{"DX"};
		# Clear out the registers now
		$registers{"AX"}="";
		$registers{"BX"}="";
		$registers{"CX"}="";
		$registers{"DX"}="";
	}
}

# Loads up the stack_state based on calls
sub set_stack_state {
   my $addr = shift;
   my $value;
   my $string = "";
   foreach  $value (@{$registers{"STACK"}}) {
	$string.="$value ";
   }
   $stack_state{$addr}=$string;
}

# locates possible functions in dump file
sub locate_functions {
   my $line = shift; 
   my $addr;
   my $call_addr;
   my $symbol;
   
   if($line=~/(\w*):(.*)call(\s*)0x(\w{7})/) {
   	$call_addr=$1;
	$addr=$4;
	$function{$addr}="FUNCTION" if !$function{$addr};
	set_stack_state($call_addr);
	@{$registers{"STACK"}} = undef;
   } elsif ($line=~/(\w*):(.*)call(\s*)(\w{7}) \</) {
   	# If the binary is not stripped it looks a bit different
	$addr=$4;
	$call_addr=$1;
	$function{$addr}="FUNCTION" if !$function{$addr};
	set_stack_state($call_addr);
	@{$registers{"STACK"}} = undef;
   } elsif ($line=~/(\w*) \<(\S*)\>:/) {
	# We get the symbols here so as not to get external references
	$addr=$1;
	# We need to remove the leading zero
	if (length($addr) >= 8) {
		$addr=substr($addr,1);
	}
	$symbol=uc($2);
	$function{$addr}=$symbol."_FUNCT";
   }
}

# Locates teh begining of .text and sets a flag
sub locate_text_section {
    my $line=$_;
    my $found=0;
    $found=1 if $line=~/^(\w*) \<\.text\>:/;
    $found=1 if $line=~/Disassembly of section \.text:/;
    return $found;
}

# Calls locate_X subroutines to locate sections in source
sub fact_finder {
   my $target = shift;
   my $line;
   my $total_lines;
   my $text_start;
   print "Parsing source for functions, interrupts, etc..." if $verbose;
   open DATA, "$target";
   while(<DATA>) {
	$line = $_;
	$total_lines++;
	locate_register_state($line);   # this must come first
	$text_start=1 if locate_text_section($line);
	locate_stack_calls($line) if $text_start;
	locate_functions($line);
	locate_interrupts($line);
   }
   close DATA;
   print "done.\n" if $verbose;
   return $total_lines;
}

sub parse_headers {
    my $OBJDUMP = shift;
    my $target = shift;
    my $line;
    my $len;
    my $start;
    my $name;
    my @headers;
    my $start_addr;
    print "Parsing header sections..." if $verbose;
    open HEADER, "$OBJDUMP -x --headers $target 2>/dev/null|" || die "Couldn't open $OBJDUMP --headers $target :$!";
    while(<HEADER>) {
	$line=$_;
	if ($line=~/^start address 0x(\w{8})/) {
		$start_addr=$1;
		# NOTE: This could be detected via .text VMA/LMA as well
		if(substr($start_addr,0,4) eq "0537") {
			print "\nWARNING:  Binary may be encrypted with burneye!\n";
			print "     You may need to use utilities like fenris or burndump\n";
			print "     to fully examine this file.  Use extreme caution.\n";
		}
	}
	if ($line=~/(\d*) \.(\S*)(\s*)(\w*)(\s*)(\w*)/) {
		$name = $2;
		$len = hex $4;
		$start = hex $6;
		push @headers, $name;
		$header{$name}{"START"}=$start;
		$header{$name}{"END"}=$start + $len;
	}
    }
    close HEADER;
    print "done.\n" if $verbose;
    return @headers;
}

# Attempts to locate functions based on MD5 sums with dress(1) from fenris
sub dress_binary {
    my $DRESS=shift;
    my $target=shift;
    my $line;
    my $err;
    my $basef=basename($target);
    my $newtarget="$work_dir/$basef.dressed";;
    print "Dressed binary as $newtarget.\n" if $verbose;
    unlink $newtarget if -r $newtarget;
    $err=system("$DRESS $target $newtarget &>/dev/null");
    return $newtarget if !$err;
    print "failed to dress binary.\n" if $verbose;
    return $target;
}

# If the file is dynamically linked index the symbols into the function hash
sub dump_dyn_symbol_table {
     my $OBJDUMP=shift;
     my $target=shift;
     my $line;
     my $addr;
     my $symbol;

     open TARGET, "$OBJDUMP -T $target|";
     while(<TARGET>) {
        $line=$_;
	$addr=""; $symbol="";
	if($line=~/(\w{8})(\s*)DF (\S{5})(\s*)(\w{8})(\s*)GLIBC_2.(\d*)(\s*)(\S*)/) {
		$addr=$1;
		$symbol=$9;
	} elsif($line=~/(\w{8})(\s*)DF (\S{5})(\s*)(\w{8}) (\S*)/) {
		$addr=$1;
		$symbol=$6;
	}
	if($addr && $symbol) {
		$addr=substr($addr,1);
		$function{$addr}=uc($symbol)."_FUNCT";
	}
     }
     close TARGET;
}

# Usage error=dump_binary("objdump", "target_file");
sub dump_binary {
   my $OBJDUMP= shift;
   my $target = shift;
   my $outfile = basename($target);
   my @headers = @_;
   my $err=0;
   print "Creating original dump file $work_dir/$outfile.dump..." if $verbose;
   if($err=system("$OBJDUMP -d $target > $work_dir/$outfile.dump")) {
	print "failed.\n" if $verbose;
	return $err;
   }
   print "done.\n" if $verbose;
   if($options{"H"}) {
     foreach my $section (@headers) {
   	print "Creating dumpfile of contents in $work_dir/$outfile.$section..." if $verbose;
   	if($err=system("$OBJDUMP -j .$section -s $target &> $work_dir/$outfile.$section")) {
		print "failed.\n" if $verbose;
   	} else {
   		print "done.\n" if $verbose;
	}
     }
   }
   return $err;
}

# Gets stack state and sets comment for functions
sub comment_stack_state {
   my $addr = shift;
   my $funct_addr = shift;
   my $comment = "";
   my $var_addr;
   my @values;
   my $value;
   my $count=0;
   my $num_args = -1;
   if($stack_state{$addr}) {
	@values=reverse(split(' ',$stack_state{$addr}));
	# Check for known functions
	if($function{$funct_addr}!~/FUNCTION(\d*)/ && $#values != -1) {
		## LIBC_START_MAIN is special
		if($function{$funct_addr} eq "__LIBC_START_MAIN_FUNCT") {
			$function{$values[0]} = "_START_MAIN_FUNCT";
		}
		if($function{$funct_addr} eq "CLOSE_FUNCT") {
			$values[$#values]="STDIN" if $values[$#values] == 0;
			$values[$#values]="STDOUT" if $values[$#values] == 1;
			$values[$#values]="STDERR" if $values[$#values] == 2;
			$num_args=1;
		} elsif($function{$funct_addr} eq "FOPEN_FUNCT") {
			$num_args=2;
		} elsif($function{$funct_addr} eq "BZERO_FUNCT") {
			$num_args=2;
		} elsif($function{$funct_addr} eq "SOCKET_FUNCT") {
			$values[0]="PF_UNSPEC" if $values[0] == 0;
			$values[0]="PF_LOCAL" if $values[0] == 1;
			$values[0]="PF_INET" if $values[0] == 2;
			$values[0]="PF_IPX" if $values[0] == 4;
			$values[0]="PF_INET6" if hex $values[0] == 10;
			$values[0]="PF_NETLINK" if hex $values[0] == 16;
			$values[1]="SOCK_STREAM" if $values[1] == 1;
			$values[1]="SOCK_DGRAM" if $values[1] == 2;
			$values[1]="SOCK_RAW" if $values[1] == 3;
			$values[2]="PROTO_IP" if hex $values[2] == 0;
			$values[2]="PROTO_ICMP" if hex $values[2] == 1;
			$values[2]="PROTO_TCP" if hex $values[2] == 6;
			$values[2]="PROTO_UDP" if hex $values[2] == 17;
			if($values[2]!~/PROTO_/) {
				$values[2]="PROTO_".hex $values[2];
			}
			$num_args=3;
		} elsif($function{$funct_addr} eq "GETEUID_FUNCT") {
			$num_args=0;
		}
		if($num_args>=0) {
			$#values=$num_args-1;
		}
	}
	$comment="(";
	foreach $value (@values) {
		if($value=~/(\w{7})/) {
		   $var_addr=$1;
		   if($constants{$var_addr} eq "rodata") {
		   	$comment.="\"".return_rostring($var_addr,14)."\"";
		   } elsif ($constants{$var_addr} eq "data") {
		   	if($var_addr=data_ref_pointer($var_addr)) {
				$comment.="\"".return_rostring($var_addr,14)."\"";
			}
		   } else {
		  	$comment.="$value";
		   }
		} else {
		   $comment.="$value";
		}
		$count++;
		$comment.="," if ($count <= $#values);
	}
	$comment.=")";
   }
   return $comment;
}

# Merge function list into dump file
sub merge_function_list {
   my $line = shift;;
   my $output;
   my $funct_addr;
   my $call_addr;
	if($line=~/(\w*):(.*)call(\s*)0x(\w{7})/ ||
	   $line=~/(\w*):(.*)call(\s*)(\w{7}) \</) {
	   	$call_addr=$1;
		$funct_addr=$4;
		if($function{$funct_addr}) {
			$output.="$COMMENT CALL " . $function{$funct_addr}.
			         comment_stack_state($call_addr,$funct_addr).
				 "\n";
		}
		$output.=$line;
	} elsif ($line=~/(\w*):/) {
		$funct_addr=$1;
		if($function{$funct_addr}) {
			$output.="$COMMENT [".$function{$funct_addr}."]\n";
		}
		$output.=$line;
	} else {
		$output.=$line;
	}
   return $output;
}

# Simply adds an extra \n after a 'ret' call
sub comment_ret {
  my $line = shift;
  if ($line=~/(\w*):(.*)(\s*)ret/) {
	$line.="\n";
  }
  return $line;
}

# Checks for lines that need to be commented
sub comment_lines {
   my $target = shift;
   my $output_file = shift;
   my $line;
   print "Commenting functions and constants calls..." if $verbose;
   open TARGET, "$target" || warn "$target: $!";
   $output_file = "$target.commented" if !$output_file;
   open TMP, ">$output_file" || warn "Error creating $output_file: $!";
   print TMP "$COMMENT Assembler source was auto-commented with the Examiner v$VERSION\n";
   print TMP "$COMMENT http://AcademicUnderground.org/examiner/\n";
   while(<TARGET>) {
	$line=$_;
	check_constants($line);
	$line=merge_function_list($line);
	$line=comment_rodata($line) if @rodata;
	$line=comment_ret($line);
	print TMP $line;
   }
   close TMP;
   close TARGET;
   print "done.\n" if $verbose;
}

# Writes the interrupts hash to disk
sub create_int_references {
   $target = shift;
   my $line;
   my $int_addr;
   print "Writing interrupts and registers to $target..." if $verbose;
   open INTS, ">$target";
   foreach $int_addr (keys %interrupts) {
	$line="$int_addr: Int(".$interrupts{$int_addr}{"INT"}.") ";
	$line.="AX(".$interrupts{$int_addr}{"AX"}.") " if $interrupts{$int_addr}{"AX"};
	$line.="BX(".$interrupts{$int_addr}{"BX"}.") " if $interrupts{$int_addr}{"BX"};
	$line.="CX(".$interrupts{$int_addr}{"CX"}.") " if $interrupts{$int_addr}{"CX"};
	$line.="DX(".$interrupts{$int_addr}{"DX"}.") " if $interrupts{$int_addr}{"DX"};
	$line.="\n";
	print INTS $line;
   }
   close INTS;
   print "done.\n" if $verbose;
}

# Creates the section boundry list
sub create_section_boundry {
  my $target = shift;
  my $section;
  my $hex_start;
  my $hex_end;
  print "Writing section boundries to disk..." if $verbose;
  open SECTIONS, ">$target" || warn "Couldn't create $target : $!";
  print SECTIONS "SECTION\t\t \tSTART           END\n";
  foreach $section (keys %header) {
  	$hex_start=sprintf("%x",$header{$section}{"START"});
	$hex_end=sprintf("%x",$header{$section}{"END"});
	print SECTIONS "$section\t\t:\t".$header{$section}{"START"}." - ".$header{$section}{"END"}." ($hex_start - $hex_end)\n";
  }
  close SECTIONS;
  print "done.\n" if $verbose;
}

# Updates function list given the function hash
sub update_function_list {
   my $target = shift;
   my $line;
   my @funct_addrs;
   my $funct_addr;
   print "Updating function list on disk ($target)..." if $verbose;
   @funct_addrs = sort keys %function;
   open TARGET, ">$target";
   foreach $funct_addr (@funct_addrs) {
	print TARGET "$funct_addr - ". $function{$funct_addr}."\n";
   }
   close TARGET;
   print "done.\n" if $verbose;
}

# finds address locations and reports what section
sub locate_section {
   my $addr = hex shift;
   my $section;
   foreach $section (keys %header) {
   	next if $header{$section}{"START"} == 0;
	if(($addr >= $header{$section}{"START"}) &&
	   ($addr < $header{$section}{"END"})) {
		return $section;
	   }
   }
}

# Returns an address pointer from .data pointing to .rodata
sub data_ref_pointer {
  my $addr = shift;
  my $start = (hex $addr) - $header{"data"}{"START"};
  my $newaddr;
  if($start < 0 || $start > $header{"data"}{"END"}) {
	return;
  }
  $start=$start*2; # Becuase data is in ASCII
  $newaddr=substr($data_section,$start,8);
  $newaddr=~s/(\w{2})(\w{2})(\w{2})(\w{2})/$4$3$2$1/;
  if((hex $newaddr) > $header{"rodata"}{"START"} &&
     (hex $newaddr) < $header{"rodata"}{"END"}) {
	return $newaddr;
  }
}

# Returns strings from rodata
sub return_rostring {
   my $addr = shift;
   my $maxlen = shift;
   my $string;
   my $byte;
   my $start;
   my $done;
   my $temp;
   my $initial_start;
   $start = (hex $addr) - $header{"rodata"}{"START"};
   # Sanity Check
   if($start < 0 || $start > $header{"rodata"}{"END"}) {
	return "EXAMINIER: BAD REFERENCE: $addr";
   }
   $initial_start = $start;
   while(!$done && $start < $header{"rodata"}{"END"}) {
   	$temp=uc(unpack("H2", $rodata[$start]));
	if($temp ne "00") {
		# Some check for special chars
		if($temp eq "0A") {
			$string.="\\n";
		} elsif($temp eq "0D") {
			$string.="\\r";
		} elsif($temp eq "07") {
			$string.="\\a";
		} elsif($temp eq "08") {
			$string.="\\b";
		} elsif($temp eq "09") {
			$string.="\\t";
		} elsif($temp eq "0B") {
			$string.="\\v";
		} elsif($temp eq "0C") {
			$string.="\\f";
		} elsif($temp gt "7F" || $temp lt "20") {
			$string.="\\x$temp";
		} else {
			$string.=$rodata[$start];
		}
		$start++;
		#Ensure code isn't too long
		if((($start-$initial_start) > $maxlen) &&
		   !$options{"L"}) {
			$string.=" ...";
			$done=1;
		}
	} else {
		$done=1;
	}
   }
   return $string;
}

sub usage {
   print "the Examiner version $VERSION\n";
   print "$0 [options] -x <file_to_dissassemble>\n";
   print "\n";
   print "      -x file    File to Dissassemble (Mandatory)\n";
   print "      -o file    Output file for commented source\n";
   print "      -v         Increase verbosity\n";
   print "      -s         Provide summary information\n";
   print "      -d dir     Specify data directory for files\n";
   print "                 (default: \$HOME/examiner-data or \$TCT_HOME)\n";
   print "      -c char    Specify a comment character (default: '$COMMENT')\n";
   print "      -H         Dump headers in hex to files\n";
   print "      -R         Create additional reference files\n";
   print "      -L         Ignore string constant size limitations (Dangerous)\n";
   print "      -C file    Path to TCT's coroner.conf file (optional)\n";
   print "      -q         Quite.  Supress output of phases\n";
   print "      -V         Print Version info and exit\n";
   print "      -h         This help screen\n";
   print "\n";
   print "  Disassembles binary data and creates some additional files that\n";
   print "  may be helpfull for reverse-engineering further data.\n";
   print "\n(c) 2002 by Craig Smith under the terms of the GPL\n";
   exit(1);
}

# Prints the function list from memory...used for debugging
sub print_functions {
   my $funct_addr;
   print "---[Function List]----\n";
   foreach $funct_addr (keys %function) {
	print "function{$funct_addr}=".$function{$funct_addr}."\n";
   }
   print "----------------------\n";
}

# Usage: init_type(<results from file cmd>);
# Determines OS type and setups any vars needed
sub init_type {
    my $file_type = shift;
    my $OS="";
    if ($file_type=~/ELF 32-bit LSB executable, Intel/) {
    	if($file_type=~/\((\w*)\)/) {
		$OS="$1 x86";
	}
	if($file_type=~/dynamic/) {
		$OS.= " dynamic";
	}
	if($file_type!~/not stripped/) {
		$OS.=" (stripped)";
	}
    } elsif ($file_type=~/NetBSD/ || $file_type=~/OpenBSD/) {
	if($file_type=~/(\w*)\/i386/) {
		$OS="$1 x86";
	}
    }
    return $OS;
}
