/*
--             This file is part of the New World OS project
--                 Copyright (C) 2006-2009  QRW Software
--           J. Scott Edwards - j.scott.edwards.nwos@gmail.com 
--                      http://www.qrwsoftware.com
--                      http://nwos.sourceforge.com
--
--   This program is free software: you can redistribute it and/or modify
--   it under the terms of the GNU General Public License as published by
--   the Free Software Foundation, either version 3 of the License, or
--   (at your option) any later version.
--
--   This program is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--   GNU General Public License for more details.
--
--   You should have received a copy of the GNU General Public License
--   along with this program, in the file LICENSE.  If not, see 
--   <http://www.gnu.org/licenses/>.
--
--   You can also contact me via paper mail at:
--
--      QRW Software
--      P.O. Box 27511
--      Salt Lake City, UT 84127-0511, USA.
--
--
-- $Log: compress_sparse.c,v $
-- Revision 1.35  2009/07/12 01:46:51  jsedwards
-- Changed to calculate total blocks from total chunks instead of using
-- nwos_total_private_blocks and fixed print statement by adding 'll'.
--
-- Revision 1.34  2009/07/10 10:19:10  jsedwards
-- Add code to set the new type_code member of the disk header to "cmpr".
--
-- Revision 1.33  2009/07/02 13:02:54  jsedwards
-- Changed to not copy the header into a disk header structure, instead pass
-- the buffer to the nwos_load_private_data function which now copies the
-- header to the private disk header.
--
-- Revision 1.32  2009/06/30 13:43:59  jsedwards
-- Changed to use new disk header functions in the header.c file instead of
-- having them duplicated in this file.
--
-- Revision 1.31  2009/04/21 12:02:07  jsedwards
-- Change to byte swap chunk_info references as 64 bits and indexs as 32 bits.
--
-- Revision 1.30  2009/04/06 14:13:15  jsedwards
-- Merged in changes from CVS branch_0030_new_chunk_info branch, see revisions
-- 1.25.2.1 and 1.25.2.2 below.
--
-- Revision 1.29  2009/03/14 23:14:04  jsedwards
-- Added include of the new chunk_info.h file.
--
-- Revision 1.28  2009/03/14 11:46:07  jsedwards
-- Added include of user_config.h file.
--
-- Revision 1.27  2009/03/13 12:15:58  jsedwards
-- Added includes for config.h, log.h, and progress_bar.h files.
--
-- Revision 1.26  2009/03/08 00:06:18  jsedwards
-- Changed include objectify_private.h to disk_io.h.
--
-- Revision 1.25.2.2  2008/08/15 13:30:51  jsedwards
-- Changed for new Disk_Header with total_chunks instead of total_blocks and
-- new bit maps preceeding the 16 megabyte chunk.
--
-- Revision 1.25.2.1  2008/08/11 13:44:43  jsedwards
-- Changed to swap new 32-bit "flags_used" member of the chunk_info table
-- instead of the old "used" member.
--
-- Revision 1.25  2008/08/10 15:23:24  jsedwards
-- Added path to "Missing magic number" and "Incorrect version in header" error
-- messages.
--
-- Revision 1.24  2008/07/19 15:04:13  jsedwards
-- If 0'd out printing of every block's reference ID so it doesn't spew every
-- block ID to the screen anymore.  Added calls to print a progress bar instead.
--
-- Revision 1.23  2008/04/28 12:48:55  jsedwards
-- Added code to check for blocks where the reference ID is zero and skip over
-- them with a warning.
--
-- Revision 1.22  2008/03/12 03:56:38  jsedwards
-- Changed to use the GNU MD5 context and functions instead of the RSA context
-- and functions.
--
-- Revision 1.21  2008/02/03 01:15:40  jsedwards
-- Change to use nwos_get_private_objects_path function instead of DEFAULT_FILE.
--
-- Revision 1.20  2007/08/10 00:03:35  jsedwards
-- Removed defintion of _LARGEFILE64_SOURCE, now using _FILE_OFFSET_BITS=64.
-- Also removed using O_LARGEFILE from open call.
--
-- Revision 1.19  2007/08/02 18:51:42  jsedwards
-- Change to use the new index in the chunk_info table to compute the address
-- of the chunk in disk storage.
--
-- Revision 1.18  2007/07/15 18:32:34  jsedwards
-- Fix screwup on previous checkin (left function bswap_uint32 in).
--
-- Revision 1.17  2007/07/15 17:36:25  jsedwards
-- Changed to use WORDS_BIGENDING instead of __BYTE_ORDER == __LITTLE_ENDIAN
-- to determine endianess and byteswap_uint16 and 32 functions in objectify.h
-- instead of bswap_16 and 32 to make more platform independant.
--
-- Revision 1.16  2007/07/01 19:44:11  jsedwards
-- Upgrade to GPLv3.
--
-- Revision 1.15  2007/06/28 18:50:25  jsedwards
-- Modified for 0023 disk layout.
--
-- Revision 1.14  2007/06/21 16:25:24  jsedwards
-- Changed so that if you don't specify an output file it just computes the
-- MD5 and SHA1 checksums and outputs them.
--
-- Revision 1.13  2007/06/21 15:12:51  jsedwards
-- Added calculation of MD5 and SHA1 checksums of output file.
--
-- Revision 1.12  2007/06/20 00:29:24  jsedwards
-- Include block_offset_to_chunks in calculations so all chunks get stored.
--
-- Revision 1.11  2007/06/19 18:58:53  jsedwards
-- Remove stuff for public blocks because they are now stored separately from
-- the private objects.
--
-- Revision 1.10  2007/03/03 13:46:25  jsedwards
-- Added code to keep a count of distribution of disk blocks over the 32-bit
-- range and print them at the end.  This shows how the randomness is
-- distributed.
--
-- Revision 1.9  2007/02/11 15:15:20  jsedwards
-- Change 'sprintf' calls to 'snprintf' calls so the OpenBSD linker will stop
-- whining.
--
-- Revision 1.8  2007/02/11 14:41:26  jsedwards
-- Change all 'off64_t' and 'lseek64' references to 'off_t' and 'lseek',
-- because BSD doesn't dig the whole brain damaged 64 bit thing.
--
-- Revision 1.7  2007/01/09 13:17:02  jsedwards
-- Fix indexes when printing version string.
--
-- Revision 1.6  2007/01/09 13:07:13  jsedwards
-- Change to use Disk_Header structure.
--
-- Revision 1.5  2006/11/11 12:01:01  jsedwards
-- Update e-mail address to something that works.
--
-- Revision 1.4  2006/11/06 13:52:54  jsedwards
-- Changed to skip over public blocks for now.
--
-- Revision 1.3  2006/11/02 11:49:28  jsedwards
-- Fixed all cases where 'z' was used as a format for 'off64_t' values because
-- the older compiler complains.
--
-- Revision 1.2  2006/10/26 01:51:23  jsedwards
-- Merged alpha_05_branch back into main trunk.
--
-- Revision 1.1.2.8  2006/10/22 12:45:21  jsedwards
-- Change to use the number of blocks stored on disk instead of
-- BLOCKS_ON_DISK #define.
--
-- Revision 1.1.2.7  2006/10/22 12:40:47  jsedwards
-- Corrected version string error message.
--
-- Revision 1.1.2.6  2006/10/19 01:42:47  jsedwards
-- Fixed format specifiers for uint32, which is now an int instead of a long,
-- and off64_t.
--
-- Revision 1.1.2.5  2006/10/15 16:31:42  jsedwards
-- Changed to use block maps to find blocks to write instead of scanning
-- the entire drive.  Time reduced from 87 to 63 minutes.
--
-- Revision 1.1.2.4  2006/10/15 12:10:08  jsedwards
-- Change to read an entire chunk at a time and to skip over the block maps.
--
-- Revision 1.1.2.3  2006/09/19 14:11:57  jsedwards
-- Added printing of number of blocks.
--
-- Revision 1.1.2.2  2006/09/17 13:52:11  jsedwards
-- Fix argument count bug and open file read only instead of read-write.
--
-- Revision 1.1.2.1  2006/09/17 13:21:39  jsedwards
-- Program to compress the sparse objectify file into a non-sparse file.
--
*/


#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>

#include "gnu/md5.h"
#include "gnu/sha1.h"

#include "chunk_info.h"
#include "config.h"
#include "disk_io.h"
#include "header.h"
#include "log.h"
#include "progress_bar.h"
#include "user_config.h"


static void print_usage(char *program)
{
    fprintf(stderr, "usage: %s [output-file]\n", program);
    fprintf(stderr, " if no output file is specified it just outputs the checksums.\n");
}


#define SIZE_COUNTS 16

int main(int argc, char* argv[])
{
    int obj_file_desc;
    const char* obj_file_path;
    const char* error_msg;
    off_t chunk;
    uint8 block_map[BIT_MAP_BYTES];
    uint8 block[FILE_BLOCK_SIZE];
    int i;
    int j;
    size_t bytes_read;
    FILE* ofp = NULL;
    int num_blocks;
    char msg[128];
    uint32 counts[SIZE_COUNTS];
    uint32 ref;
    struct md5_ctx md5_context;    /* MD5 checksum context */
    struct sha1_ctx sha1_context;
    uint8 md5_digest[16];
    uint8 sha1_digest[20];
    int chunk_num;
    Chunk_Info* chunk_info;
    int blocks_with_bad_ids = 0;


    for (i = 0; i < SIZE_COUNTS; i++) counts[i] = 0;

    if (argc > 2)
    {
	print_usage(argv[0]);
	exit(1);
    }

    if (argc == 2 && *argv[1] == '-')
    {
	fprintf(stderr, "error: this program doesn't have any options\n");
	print_usage(argv[0]);
	exit(1);
    }


    /* Open the storage drive and verify the header info */

    nwos_log_arguments(argc, argv);

    obj_file_path = nwos_get_private_objects_path();

    obj_file_desc = open(obj_file_path, O_RDONLY);

    if (obj_file_desc < 0)
    {
	perror(obj_file_path);
	exit(1);
    }

    bytes_read = read(obj_file_desc, block, sizeof(block));

    if (bytes_read != sizeof(block))
    {
	perror("reading first block");
	exit(1);
    }

    error_msg = nwos_load_private_data(block, sizeof(block), false);   /* don't allow compressed files */

    if (error_msg != NULL)
    {
	fprintf(stderr, "%s: %s\n", error_msg, obj_file_path);
	exit(1);
    }

    assert(nwos_used_private_chunks > 0);

    chunk_info = malloc(nwos_used_private_chunks * sizeof(Chunk_Info));
    assert(chunk_info != NULL);

    bytes_read = read(obj_file_desc, chunk_info, nwos_used_private_chunks * sizeof(Chunk_Info));

    if (bytes_read != nwos_used_private_chunks * sizeof(Chunk_Info))
    {
	perror("reading chunk info");
	exit(1);
    }

    /* fix the byte order on little endian machines */
#ifndef WORDS_BIGENDIAN
	{
	  int i;
	  for (i = 0; i < nwos_used_private_chunks; i++)
	  {
	      chunk_info[i].ref = byteswap_uint64(chunk_info[i].ref);
	      chunk_info[i].flags_used = byteswap_uint32(chunk_info[i].flags_used);
	      chunk_info[i].index = byteswap_uint32(chunk_info[i].index);
	  }
	}
#endif

    if (argc == 2)
    {
	ofp = fopen(argv[1], "w");

	if (ofp == NULL)
	{
	    perror(argv[1]);
	    exit(1);
	}
    }

    md5_init_ctx(&md5_context);   /* initialize the MD5 checksum context */
    sha1_init_ctx(&sha1_context);

    printf("header: %c%c%c%c %c%c%c%c\n",
	   nwos_private_disk_header.magic_number[0], nwos_private_disk_header.magic_number[1], nwos_private_disk_header.magic_number[2], nwos_private_disk_header.magic_number[3], 
	   nwos_private_disk_header.version_string[0], nwos_private_disk_header.version_string[1], nwos_private_disk_header.version_string[2], nwos_private_disk_header.version_string[3]);

    printf("total blocks on disk: %llu  chunks_used: %d\n", (uint64)nwos_total_private_chunks * USABLE_BLOCKS_PER_CHUNK, nwos_used_private_chunks);
    fflush(stdout);

    memcpy(&(((Disk_Header*)block)->type_code), TYPE_CODE_COMPRESSED, sizeof((((Disk_Header*)block)->type_code)));

    /* write the first 256 bytes always */
    if (ofp != NULL && fwrite(block, 1, sizeof(block), ofp) != sizeof(block))
    {
	perror(argv[1]);
	close(obj_file_desc);
	exit(1);
    }

    md5_process_bytes(block, sizeof(block), &md5_context);    /* include this data in the md5 checksum */
    sha1_process_bytes(block, sizeof(block), &sha1_context);    /* include this data in the sha1 checksum */

    num_blocks = 0;

    nwos_start_progress_bar();

    // for now we can skip over public blocks because they should always stay the same
    for (chunk_num = 0; chunk_num < nwos_used_private_chunks; chunk_num++)
    {
	nwos_update_progress_bar((float)num_blocks / (float)nwos_used_private_blocks);

	chunk = nwos_block_offset_to_chunks + chunk_info[chunk_num].index * BLOCKS_IN_CHUNK;

	if (lseek(obj_file_desc, chunk << 8, SEEK_SET) < 0)
	{
	    snprintf(msg, sizeof(msg), "lseek chunk:%08x", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	bytes_read = read(obj_file_desc, block_map, sizeof(block_map));

	if (bytes_read != sizeof(block_map))
	{
	    snprintf(msg, sizeof(msg), "reading block map: %u", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	for (i = 0; i < BLOCKS_IN_CHUNK; i++)
	{
	    if ((block_map[i/8] & (0x80 >> (i%8))) != 0)
	    {
		if (lseek(obj_file_desc, (chunk + BIT_MAP_BLOCKS + i) << 8, SEEK_SET) < 0)
		{
		    snprintf(msg, sizeof(msg), "lseek block:%08x", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}

		bytes_read = read(obj_file_desc, block, sizeof(block));

		if (bytes_read != sizeof(block))
		{
		    snprintf(msg, sizeof(msg), "reading block: %u", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}
		
		ref = (uint32)block[4] << 24 | (uint32)block[5] << 16 | (uint32)block[6] << 8 | (uint32)block[7];

		if (ref == 0)
		{
		    blocks_with_bad_ids++;

		    printf("WARNING: reference in block is zero, not written:\n");

		    for (j = 0; j < FILE_BLOCK_SIZE; j += 16)
		    {
			printf("%02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
			       block[j+0],  block[j+1],  block[j+2],  block[j+3],
			       block[j+4],  block[j+5],  block[j+6],  block[j+7],
			       block[j+8],  block[j+9],  block[j+10], block[j+11],
			       block[j+12], block[j+13], block[j+14], block[j+15]);
		      }
		}
		else
		{
#if 0
		    /* save this for verbose mode? */
		    printf("id: %08x  block: %08x\n", ref, (uint32)chunk+i);
		    /* printf("id: %08x\n", ref); */
		    fflush(stdout);
#endif
		    counts[ref >> 28]++;

		    if (ofp != NULL && fwrite(block, 1, sizeof(block), ofp) != sizeof(block))
		    {
			perror(argv[1]);
			close(obj_file_desc);
			exit(1);
		    }

		    md5_process_bytes(block, sizeof(block), &md5_context);    /* include this data in the md5 checksum */
		    sha1_process_bytes(block, sizeof(block), &sha1_context);    /* include this data in the sha1 checksum */
		    num_blocks++;
		}
	    }
	}
    }

    nwos_finish_progress_bar();

    printf("Number of blocks: %d\n", num_blocks);

    if (blocks_with_bad_ids > 0)
    {
	printf("WARNING: %d blocks had a reference ID of zero and weren't written!\n", blocks_with_bad_ids);
    }

    md5_finish_ctx(&md5_context, md5_digest);   /* finish computing the md5 sum */
    sha1_finish_ctx(&sha1_context, sha1_digest);

    printf("MD5: ");
    for (i = 0; i < sizeof(md5_digest); i++) printf("%02x", md5_digest[i]);
    printf("\n");

    printf("SHA1: ");
    for (i = 0; i < sizeof(sha1_digest); i++) printf("%02x", sha1_digest[i]);
    printf("\n");

    for (i = 0; i < SIZE_COUNTS; i++)
    {
	if (counts[i] > 0) printf("  %d: %u\n", i, counts[i]);
    }

    if (ofp != NULL && fclose(ofp) != 0)
    {
	perror(argv[1]);
	exit(1);
    }

    close(obj_file_desc);

    return 0;
}

