/************************************************************************/
/*									*/
/*  Translation between unicode values and utf-8.			*/
/*									*/
/************************************************************************/

#   include	<stdlib.h>
#   include	<stdio.h>
#   include	<string.h>

#   include	<ucd.h>
#   include	<uniUtf8.h>

#   include	<appDebugon.h>

/************************************************************************/
/*									*/
/*  UTF-8 Calculations.							*/
/*									*/
/************************************************************************/

# define UTF8_1(a)		(a)
# define UTF8_2(a,b)		(     64*((a)&0x1f)+ \
				         ((b)&0x3f) )
# define UTF8_3(a,b,c)		(   4096*((a)&0x0f)+ \
				      64*((b)&0x3f)+ \
				         ((c)&0x3f) )
# define UTF8_4(a,b,c,d)	( 262144*((a)&0x07)+ \
				    4096*((b)&0x3f)+ \
				      64*((c)&0x3f)+ \
				         ((d)&0x3f) )

/************************************************************************/
/*									*/
/*  Retrieve one character from a string.				*/
/*									*/
/************************************************************************/

int uni_GetUtf8(	unsigned short *	pSymbol,
			const unsigned char *	buf )
    {
    if  ( ! buf[0] )
	{ return 0;	}

    if  ( buf[0] < 128 )
	{ *pSymbol= UTF8_1(buf[0]); return 1; }

    if  ( ( buf[1] & 0xc0 ) != 0x80 )
	{ XXDEB(buf[0],buf[1]); return 0;	}
    if  ( buf[0] < 224 )
	{ *pSymbol= UTF8_2(buf[0],buf[1]); return 2; }

    if  ( ( buf[2] & 0xc0 ) != 0x80 )
	{ XXDEB(buf[0],buf[2]); return 0;	}
    if  ( buf[0] < 240 )
	{ *pSymbol= UTF8_3(buf[0],buf[1],buf[2]); return 3; }

    if  ( ( buf[3] & 0xc0 ) != 0x80 )
	{ XXDEB(buf[0],buf[3]); return 0;	}
    /* Does not fit in a 16 bit integer!
    *pSymbol= UTF8_4(buf[0],buf[1],buf[2],buf[3]); return 4;
    */
    LLDEB(buf[3],UTF8_4(buf[0],buf[1],buf[2],buf[3]));
    return 0;
    }

/************************************************************************/
/*									*/
/*  Emit one character to a string.					*/
/*									*/
/************************************************************************/

int uni_PutUtf8(	unsigned char *		buf,
			int			symbol )
    {
    if  ( symbol < 0 )
	{ LDEB(symbol); return 0;	}
    if  ( symbol < 1 )
	{ return 0;	}

    if  ( symbol < 128 )
	{
	*(buf++)= symbol;
	return 1;
	}

    if  ( symbol < 2048 )
	{
	*(buf++)= 0xc0 | ( ( symbol >>  6 )        );
	*(buf++)= 0x80 | ( ( symbol       ) & 0x3f );
	return 2;
	}

    if  ( symbol < 65536 )
	{
	*(buf++)= 0xe0 | ( ( symbol >> 12 )        );
	*(buf++)= 0x80 | ( ( symbol >>  6 ) & 0x3f );
	*(buf++)= 0x80 | ( ( symbol       ) & 0x3f );
	return 3;
	}

    if  ( symbol < 2097152 )
	{
	*(buf++)= 0xf0 | ( ( symbol >> 18 ) & 0x07 );
	*(buf++)= 0x80 | ( ( symbol >> 12 ) & 0x3f );
	*(buf++)= 0x80 | ( ( symbol >>  6 ) & 0x3f );
	*(buf++)= 0x80 | ( ( symbol       ) & 0x3f );
	return 4;
	}

    LDEB(symbol); return 0;
    }

/************************************************************************/
/*									*/
/*  Translate an utf8 string to 16 bit unicode values.			*/
/*									*/
/************************************************************************/

unsigned short * uniUtf8ToUnicodes(	int *			pUlen,
					const unsigned char *	word )
    {
    int			l= strlen( (char *)word );
    unsigned short *	ucods= malloc( l* sizeof(unsigned short) );
    int			fr;
    int			ulen;

    if  ( ! ucods )
	{ LXDEB(l,ucods); return (unsigned short *)0;	}

    fr= 0; ulen= 0;
    while( fr < l )
	{
	int	step= uniGetUtf8( ucods+ ulen, word+ fr );

	if  ( step < 1 )
	    { break;	}

	ulen++; fr += step;
	}

    *pUlen= ulen;
    return ucods;
    }

/************************************************************************/

static int uniShiftUtf8StringUp( char *			to,
				int *			segments,
				int			segmentCount,
				const char *		from,
				int			len )
    {
    int		n;
    int		seg= 0;

    n= 0;
    while( n < len )
	{
	unsigned short		symbol;
	int			n0= 0;
	int			n1= 0;
	int			df;
	int			dt;

	if  ( segments && seg >= segmentCount )
	    { LLDEB(seg,segmentCount); return -1;	}

	while( n < len )
	    {
	    df= uniGetUtf8( &symbol, (unsigned char *)from );
	    if  ( df < 1 )
		{ LDEB(df); return -1;	}
	    if  (   ucdIsLl( symbol ) )
		{ break;	}

	    dt= uniPutUtf8( (unsigned char *)to, symbol );
	    n0 += dt; from += df; to += dt; n += df;
	    }

	while( n < len )
	    {
	    df= uniGetUtf8( &symbol, (unsigned char *)from );
	    if  ( df < 1 )
		{ LDEB(df); return -1;	}
	    if  ( ! ucdIsLl( symbol ) )
		{ break;	}

	    symbol= ucdToUpper( symbol );

	    dt= uniPutUtf8( (unsigned char *)to, symbol );
	    n1 += dt; from += df; to += dt; n += df;
	    }

	if  ( segments )
	    {
	    segments[2*seg+ 0]= n0;
	    segments[2*seg+ 1]= n1;
	    }

	seg++;
	}

    *to= '\0';
    return seg;
    }

/************************************************************************/
/*									*/
/*  Shift a string to different case.					*/
/*									*/
/*  If the caller asks for segments, the even segments receive the	*/
/*  (byte)lengths of the streches that already were in the desired	*/
/*  case. The odd segments recieve the (byte)lengths of the streches	*/
/*  that were not in the desired case.					*/
/*									*/
/************************************************************************/

int uniShiftUtf8String(	char **			pShifted,
			int **			pSegments,
			int *			pSegmentCount,
			int			how,
			const char *		sourceString,
			int			len )
    {
    int			rval= 0;

    char *		shiftedString= (char *)0;
    int	*		segments= (int *)0;
    int			segmentCount= 0;

    switch( how )
	{
	case SHIFT_ASIS:
	    LDEB(how); rval= -1; goto ready;

	case SHIFT_LOWER:
	    LDEB(how); rval= -1; goto ready;

	case SHIFT_FIRSTCAP:
	    LDEB(how); rval= -1; goto ready;

	case SHIFT_UPPER:
	    shiftedString= malloc( 2* len+ 1 );
	    if  ( ! shiftedString )
		{ XDEB(shiftedString); rval= -1; goto ready;	}

	    if  ( pSegments && pSegmentCount )
		{
		/*  3  */
		segments= malloc( (len+ 2)* sizeof(int) );
		if  ( ! segments )
		    { XDEB(segments); rval= -1; goto ready;	}
		}

	    segmentCount= uniShiftUtf8StringUp( shiftedString,
							segments, len+ 2,
							sourceString, len );
	    if  ( segmentCount < 1 )
		{ LDEB(segmentCount); rval= -1; goto ready;	}

	    *pShifted= shiftedString; shiftedString= (char *)0; /* steal */
	    if  ( pSegments )
		{ *pSegments= segments; segments= (int *)0; /* steal */	}
	    if  ( pSegmentCount )
		{ *pSegmentCount= segmentCount;				}
	    break;

	default:
	    LDEB(how); rval= -1; goto ready;
	}


  ready:

    if  ( shiftedString )
	{ free( shiftedString );	}
    if  ( segments )
	{ free( segments );	}

    return rval;
    }
