<?php
// Usage: php csv2sqlite.php [ <source> [ <dest> ] ]

if ( !extension_loaded( "pdo_sqlite" ) ) {
	exit( "ERROR: script needs pdo_sqlite extension to run\n" );
}

$source = "ip-to-country.csv";
$dest = "ipdb.db";
if ( $argc > 1 ) {
	$source = $argv[1];
	if ( $argc > 2 ) {
		$dest = $argv[2];
	}
}
$source = realpath( $source );
$dest = realpath( dirname( $dest ) ).DIRECTORY_SEPARATOR.basename( $dest );
echo "Source: $source\nDest: $dest\n";

// read input file
$lines = file( $source, FILE_IGNORE_NEW_LINES );
if ( !$lines ) {
	exit( "\nERROR: Could not read source file.\n" );
}

// remove any old database
if ( file_exists( $dest ) ) {
	echo "Removing old destination database...";
	if ( !@unlink( $dest ) ) {
		exit( "\n\nERROR: Cant remove old destination file.\n" );
	}
	echo "done\n";
}

try {
	// create database
	echo "Creating database...";
	$pdo = new PDO( "sqlite:".$dest );
	$pdo->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION );
	$pdo->exec( "PRAGMA synchronous = OFF" );
	$pdo->exec( "PRAGMA journal_mode = OFF" );
	
	// create schema
	$pdo->exec( "CREATE TABLE ip2country (
					ip_from INTEGER,
					ip_to INTEGER,
					country_code TEXT
				)" );
	$pdo->exec( "CREATE TABLE countries (
					country_code TEXT PRIMARY KEY,
					country_name TEXT
				)" );
	// indexes for faster search
	// the downside is that the resulting db is 2.5 times bigger :/
	$pdo->exec( "CREATE INDEX ix_ip_from ON ip2country( ip_from )" );
	$pdo->exec( "CREATE INDEX ix_ip_to ON ip2country( ip_to )" );
	echo "done\n";
	
	define( "LOADING_LENGTH", 50 );
	echo "Inserting records";
	
	// setup prepared statements
	$ip2countryStmt = $pdo->prepare( "INSERT INTO ip2country VALUES ( ?, ?, ? )" );
	$countriesStmt = $pdo->prepare( "INSERT OR IGNORE INTO countries VALUES ( ?, ? )" );
	
	// insert dummy country
	// mod will search for this record and will be used when location is unknown
	//FIXME: forcing uppercase
	$countriesStmt->execute( array( 'XX', strtoupper( 'Unknown Location' ) ) );
	
	// some unofficial and missing country codes too
	// added so flags can be used
	//FIXME: forcing uppercase
	$countriesStmt->execute( array( 'CX', strtoupper( 'Christmas Island' ) ) );
	$countriesStmt->execute( array( 'EU', strtoupper( 'European Union' ) ) );
	$countriesStmt->execute( array( 'ENG', strtoupper( 'England' ) ) );
	$countriesStmt->execute( array( 'SCO', strtoupper( 'Scotland' ) ) );
	$countriesStmt->execute( array( 'WAL', strtoupper( 'Wales' ) ) );
	$countriesStmt->execute( array( 'NIR', strtoupper( 'North Ireland' ) ) );
	
	// insert lines
	$cols = 0;
	$count = count( $lines );
	$percent = 0;
	$i = 0;
	$inserted = 0;
	foreach ( $lines as $line ) {
		// update loading "bar"
		$j = (int)( ++$i / $count * LOADING_LENGTH );
		if ( $j > $percent ) {
			$percent = $j;
			echo ".";
		}
		
		// lines starting with # or // are ignored
		if ( $line{0} == '#' || ( $line{0} == '/' && $line{1} == '/' ) ) {
			continue;
		}
		
		if ( !preg_match_all( '/"(.*?)"/', $line, $matches ) ) {
			continue;
		}
		$values = $matches[1];
		
		if ( $cols < 4 ) {
			$cols = count( $values );
			if ( $cols < 4 ) {
				continue;
			}
		}
		
		// some checks
		$ip_from = $values[0];
		$ip_to = $values[1];
		if ( !ctype_digit( $ip_from ) || !ctype_digit( $ip_to ) ) {
			continue;
		}
		$country_code = strtoupper( $values[ $cols == 4 || ( $cols > 4 && strlen( $values[ $cols - 2 ] ) == 2 ) ? $cols - 2 : $cols - 3 ] );
		if ( strlen( $country_code ) != 2 || $country_code == '00' || $country_code == 'ZZ' ) {
			continue;
		}
		$country_name = $values[ $cols - 1 ];
		if ( $country_name == '' || in_array($country_code, array('AX', 'YU', 'CS', 'GB')) ) {
			// missing country names :/
			switch( $country_code ) {
			case 'ME':
				$country_name = 'Montenegro';
				break;
			case 'YU':	// former Yugoslavia
			case 'CS':	// former Czechoslovakia
			case 'RS':
				$country_code = 'RS';
				$country_name = 'Serbia';
				break;
			case 'MF':
				$country_name = 'Saint Martin';
				break;
			case 'JE':
				$country_name = 'Jersey';
				break;
			case 'IM':
				$country_name = 'Isle of Man';
				break;
			case 'GG':
				$country_name = 'Guernsey';
				break;
			case 'AX':
				$country_name = 'Aland Islands';
				break;
			case 'GB':	// it's the official but they use UK.. we map it to a single image
				$country_code = 'UK';
				break;
			default:
				echo "\nUnknown country code: $country_code";
				$country_name = 'Unknown Location';
			}
		}
		
		// insert the record
		$ip2countryStmt->execute( array( $ip_from, $ip_to, $country_code ) );
		$countriesStmt->execute( array ( $country_code, strtoupper( $country_name ) ) );	//FIXME: forcing uppercase
		$inserted++;
	}
	
	echo "done\n\nTotal lines: $count, Inserted: $inserted, Ignored: ".( $count - $inserted );
} catch( PDOException $e ) {
	exit( "\n\nERROR: $e\n" );
}

?>
