/* Viterbi decoder for K=7 rate=1/2 convolutional code
 * continuous traceback version
 * Copyright 1996 Phil Karn, KA9Q
 *
 * -- 
 * Modified slightly for use in MFSK16 decoder by
 * Tomi Manninen, OH2BNS
 * 
 * Note that my mods probably kill the ultimate performance of the
 * decoder but MFSK doesn't need that anyway... :-)
 * -- 
 *
 * This version of the Viterbi decoder reads a continous stream of
 * 8-bit soft decision samples from standard input in offset-binary
 * form, i.e., a 255 sample is the strongest possible "1" symbol and a
 * 0 is the strongest possible "0" symbol. 128 is an erasure (unknown).
 *
 * The decoded output is written to stdout in big-endian form (the first
 * decoded bit appears in the high order bit of the first output byte).
 *
 * The metric table is fixed, and no attempt is made (yet) to find proper
 * symbol synchronization. These are likely future enhancements.
 */
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "viterbi27.h"
#include "viterbi.h"

/* This parameter sizes the path memory in bits, which is organized as a
 * circular buffer through which we periodically "trace back" to
 * produce the decoded data. PATHMEM must be greater than
 * MERGEDIST+TRACECHUNK, and for efficiency it should also be a power of 2.
 * Don't make it *too* large, or it will spill out of the CPU's on-chip cache
 * and decrease performance. Each bit of path memory costs 8 bytes for the
 * K=7 code.
 */
// #define PATHMEM		128

/* In theory, a Viterbi decoder is true maximum likelihood only if
 * the path memory is as long as the entire message and a single traceback
 * is made from the terminal state (usually zero) after the entire message
 * is received.
 *
 * In practice, performance is essentially optimum as long as decoding
 * decisions are deferred by at least 4-5 constraint lengths (28-35 bits
 * for K=7) from the most recently received symbols. MERGEDIST sets this
 * parameter. We give ourselves some margin here in case the code is
 * punctured (which slows merging) and also to let us start each traceback
 * from an arbitrary current state instead of taking the time to find the
 * path with the highest current metric.
 */
#define	MERGEDIST	64	/* Distance to trace back before decoding */

/* Since each traceback is costly (thanks to the overhead of having to
 * go back MERGEDIST bits before we produce our first decoded bit) we'd like
 * to decode as many bits as possible per traceback at the expense of
 * increased decoding delay. TRACECHUNK sets how many bits to
 * decode on each traceback. Since output is produced in 8-bit bytes,
 * TRACECHUNK MUST be a multiple of 8.
 */
// #define	TRACECHUNK	8	/* How many bits to decode on each traceback */

/* The path metrics need to be periodicially adjusted downward
 * to prevent an integer overflow that could cause the signed comparisons
 * in the butterfly macros to fail.
 *
 * It's possible to code the comparisons to work in modulo fashion, e.g.,
 * as 'if((a-b) > 0)' rather than 'if(a >b)'. A good optimizer would generate
 * code like 'cmp a,b;js foo' for this, but GCC doesn't.
 *
 * This constant should be larger than the maximum path metric spread.
 * Experimentally this seems to be 2040, which is probably related to the
 * free distance of the code (10) and the symbol metric scale (0-255).
 */
#define	RENORMALIZE	10000

#if (TRACECHUNK + MERGEDIST > PATHMEM)
#error "TRACECHUNK + MERGEDIST > PATHMEM"
#endif

#if ((TRACECHUNK % 8) != 0)
#error "TRACECHUNK not multiple of 8"
#endif

static void traceback(struct viterbi *v);

struct viterbi *init_viterbi27(void)
{
	struct viterbi *v;
	int i;

	if ((v = calloc(1, sizeof(struct viterbi))) == NULL)
		return NULL;

	/* Initialize metric table (make this an option)
	 * This table assumes a symbol of 0 is the
	 * strongest possible '0', and a symbol
	 * of 255 is the strongest possible '1'. A symbol
	 * of 128 is an erasure
	 */
	for (i = 0; i < 256; i++) {
		v->mettab[0][i] = 128 - i;
		v->mettab[1][255 - i] = 127 - i;
	}
	v->cmetric[0] = 0;
	for (i = 1; i < 64; i++)
		v->cmetric[i] = -99999;

	return v;
}

void clear_viterbi27(struct viterbi *v)
{
	free(v);
}

/*
 * 8-bit parity lookup table, generated by partab.c
 */
static unsigned char Partab[] = {
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
};

/*
 * Convolutional encoder.
 */
int encode27(unsigned int *encstate, int data)
{
	*encstate = (*encstate << 1) | !!data;
	return (Partab[*encstate & POLYA] << 1) | Partab[*encstate & POLYB];
}

/*
 * Main loop -- read input symbols and run ACS butterflies,
 * periodically tracing back to produce decoded output data.
 * The loop is unrolled to process two bits per iteration.
 */
void viterbi27(struct viterbi *v, unsigned char symbols[4])
{
	register unsigned long dec;
	int mets[4];
	int i;

	/* Renormalize metrics to prevent overflow */
	if (v->cmetric[0] > (LONG_MAX - RENORMALIZE)) {
		for (i = 0; i < 64; i++)
			v->cmetric[i] -= LONG_MAX;
	} else if (v->cmetric[0] < LONG_MIN + RENORMALIZE) {
		for (i = 0; i < 64; i++)
			v->cmetric[i] += LONG_MAX;
	}

	mets[0] = v->mettab[0][symbols[0]] + v->mettab[0][symbols[1]];
	mets[1] = v->mettab[0][symbols[0]] + v->mettab[1][symbols[1]];
	mets[3] = v->mettab[1][symbols[0]] + v->mettab[1][symbols[1]];
	mets[2] = v->mettab[1][symbols[0]] + v->mettab[0][symbols[1]];

	/* On even numbered bits, the butterflies read from cmetrics[]
	 * and write to nmetrics[]. On odd numbered bits, the reverse
	 * is done
	 */
	dec = 0;
	BUTTERFLY(0,  0);
	BUTTERFLY(6,  0);
	BUTTERFLY(8,  0);
	BUTTERFLY(14, 0);
	BUTTERFLY(2,  3);
	BUTTERFLY(4,  3);
	BUTTERFLY(10, 3);
	BUTTERFLY(12, 3);
	BUTTERFLY(1,  1);
	BUTTERFLY(7,  1);
	BUTTERFLY(9,  1);
	BUTTERFLY(15, 1);
	BUTTERFLY(3,  2);
	BUTTERFLY(5,  2);
	BUTTERFLY(11, 2);
	BUTTERFLY(13, 2);
	v->paths[2 * v->pi] = dec;
	dec = 0;
	BUTTERFLY(19, 0);
	BUTTERFLY(21, 0);
	BUTTERFLY(27, 0);
	BUTTERFLY(29, 0);
	BUTTERFLY(17, 3);
	BUTTERFLY(23, 3);
	BUTTERFLY(25, 3);
	BUTTERFLY(31, 3);
	BUTTERFLY(18, 1);
	BUTTERFLY(20, 1);
	BUTTERFLY(26, 1);
	BUTTERFLY(28, 1);
	BUTTERFLY(16, 2);
	BUTTERFLY(22, 2);
	BUTTERFLY(24, 2);
	BUTTERFLY(30, 2);
	v->paths[2 * v->pi + 1] = dec;
	v->pi++;

	mets[0] = v->mettab[0][symbols[2]] + v->mettab[0][symbols[3]];
	mets[1] = v->mettab[0][symbols[2]] + v->mettab[1][symbols[3]];
	mets[3] = v->mettab[1][symbols[2]] + v->mettab[1][symbols[3]];
	mets[2] = v->mettab[1][symbols[2]] + v->mettab[0][symbols[3]];

	dec = 0;
	BUTTERFLY2(0,  0);
	BUTTERFLY2(6,  0);
	BUTTERFLY2(8,  0);
	BUTTERFLY2(14, 0);
	BUTTERFLY2(2,  3);
	BUTTERFLY2(4,  3);
	BUTTERFLY2(10, 3);
	BUTTERFLY2(12, 3);
	BUTTERFLY2(1,  1);
	BUTTERFLY2(7,  1);
	BUTTERFLY2(9,  1);
	BUTTERFLY2(15, 1);
	BUTTERFLY2(3,  2);
	BUTTERFLY2(5,  2);
	BUTTERFLY2(11, 2);
	BUTTERFLY2(13, 2);
	v->paths[2 * v->pi] = dec;
	dec = 0;
	BUTTERFLY2(19, 0);
	BUTTERFLY2(21, 0);
	BUTTERFLY2(27, 0);
	BUTTERFLY2(29, 0);
	BUTTERFLY2(17, 3);
	BUTTERFLY2(23, 3);
	BUTTERFLY2(25, 3);
	BUTTERFLY2(31, 3);
	BUTTERFLY2(18, 1);
	BUTTERFLY2(20, 1);
	BUTTERFLY2(26, 1);
	BUTTERFLY2(28, 1);
	BUTTERFLY2(16, 2);
	BUTTERFLY2(22, 2);
	BUTTERFLY2(24, 2);
	BUTTERFLY2(30, 2);
	v->paths[2 * v->pi + 1] = dec;
	v->pi = (v->pi + 1) % PATHMEM;

	v->datalen = 0;

	if ((v->pi % TRACECHUNK) == 0) {
		if (!v->first)
			traceback(v);
		v->first = 0;
	}

	return;
}

/*
 * Periodic traceback to produce decoded data
 */
static void traceback(struct viterbi *v)
{
	unsigned int pi = v->pi;
	int beststate, i, j;

	/* Start on an arbitrary path and trace it back until it's almost
	 * certain we've merged onto the best path
	 */
	beststate = 0;				/* arbitrary */
	pi = (pi - 1) % PATHMEM;		/* Undo last increment of pi */
	for (i = 0; i < MERGEDIST - 6; i++) {
		if (v->paths[2 * pi + (beststate >> 5)] &
		    (1 << (beststate & 31))) {
			beststate |= 64;	/* 2^(K-1) */
		}
		beststate >>= 1;
		pi = (pi - 1) % PATHMEM;
	}

	v->metric = v->cmetric[beststate];

	/* bestpath is now the encoder state on the best path, MERGEDIST
	 * bits back. We continue to chain back until we accumulate
	 * TRACECHUNK bits of decoded data
	 */
	for (j = sizeof(v->data) - 1; j >= 0; j--) {
		v->data[j] = 0;
		for (i = 0; i < 8; i++) {
			if (v->paths[2 * pi + (beststate >> 5)] &
			    (1 << (beststate & 31))) {
				beststate |= 64;	/* 2^(K-1) */
				v->data[j] |= 1 << i;
			}
			beststate >>= 1;
			pi = (pi - 1) % PATHMEM;
		}
	}

	v->datalen = TRACECHUNK / 8;
}

