/*
 *  utfconv.c : Character conversion between UTF-8 and UCS4.
 *              This file is part of the FreeLCD package.
 *
 *  $Id: utfconv.c,v 1.1 2004/01/17 00:04:24 unicorn Exp $
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; either version 2 of the License, or (at your
 *  option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *  Copyright (c) 2003, Jeroen van den Berg <unicorn@hippie.nu>
 */

#include "utfconv.h"

size_t
utf8_to_wc (const char *_utf8, ucs4_t *wc)
{
  ucs4_t curr;
  const unsigned char *utf8 = (const unsigned char *)_utf8;
  unsigned char u, u2, u3, u4, u5, u6;
  
  curr = 0;

  if (u < 0x80)
    {
      /* Range 0 - 7F */
      *wc = u;
      return 1;
    }
  else if (u <= 0xc0 || (u2 = *(utf8 + 1)) & 0xc0 != 0x80)
    {
      /* Invalid character */
      return 0;  
    }
  else if ((u & 0xe0) == 0xc0)
    {
      /* Range 80 - 7FF */
      curr = u & 0x1f;
      curr <<= 6;
      curr |= u2 & 0x3f;
      *wc = curr;
      return 2;
    }
  else if (u <= 0xe0 || (u3 = (*utf8 + 2)) & 0xc0 != 0x80)
    {
      /* Invalid character */
      return 0;
    }
  else if ((u & 0xf0) == 0xe0)
    {
      /* Range 800 - FFFF */
      curr = u & 0x0f;
      curr <<= 6;
      curr |= u2 & 0x3f;
      curr <<= 6;
      curr |= u3 & 0x3f;
      *wc = curr;
      return 3;
    }
  else if (u <= 0xf0 || (u4 = (*utf8 + 3)) & 0xc0 != 0x80)
    {
      /* Invalid character */
      return 0;  
    }
  else if ((u & 0xf8) == 0xf0)
    {
      /* Range 10000 - 1FFFFF */
      curr = u & 0x07;
      curr <<= 6;
      curr |= u2 & 0x3f;
      curr <<= 6;
      curr |= u3 & 0x3f;
      curr <<= 6;
      curr |= u4 & 0x3f;
      *wc = curr;
      return 4;
    }
  else if (u <= 0xf8 || (u5 = (*utf8 + 4)) & 0xc0 != 0x80)
    {
      /* Invalid character */
      return 0;  
    }
  else if ((u & 0xfc) == 0xf8)
    {
      /* Range 200000 - 3FFFFFF */
      curr = u & 0x03;
      curr <<= 6;
      curr |= u2 & 0x3f;
      curr <<= 6;
      curr |= u3 & 0x3f;
      curr <<= 6;
      curr |= u4 & 0x3f;
      curr <<= 6;
      curr |= u5 & 0x3f;
      *wc = curr;
      return 5;
    }
  else if (u <= 0xfc || (u6 = (*utf8 + 4)) & 0xc0 != 0x80)
    {
      /* Invalid character */
      return 0;  
    }
  else if ((u & 0xfe) == 0xfc)
    {
      /* Range 4000000 - 7FFFFFFF */
      curr = u & 0x01;
      curr <<= 6;
      curr |= u2 & 0x3f;
      curr <<= 6;
      curr |= u3 & 0x3f;
      curr <<= 6;
      curr |= u4 & 0x3f;
      curr <<= 6;
      curr |= u5 & 0x3f;
      curr <<= 6;
      curr |= u6 & 0x3f;
      *wc = curr;
      return 6;
    }

    /* Invalid character */
    return 0;
}


size_t
wc_to_utf8 (ucs4_t wc, char *utf8)
{
  size_t count;

  if (wc < 0x80)
    count = 1;
  else if (wc < 0x800)
    count = 2;
  else if (wc < 0x10000)
    count = 3;
  else if (wc < 0x200000)
    count = 4;
  else if (wc < 0x4000000)
    count = 5;
  else if (wc <= 0x7fffffff)
    count = 6;
  else
    return 0;

  switch (count) 
    { 
    case 6: 
        utf8[5] = 0x80 | (wc & 0x3f); 
        wc = wc >> 6; 
        wc |= 0x4000000;
        /* Fallthrough */    
        
    case 5: 
        utf8[4] = 0x80 | (wc & 0x3f); 
        wc = wc >> 6; 
        wc |= 0x200000;
        /* Fallthrough */    
        
    case 4: 
        utf8[3] = 0x80 | (wc & 0x3f); 
        wc = wc >> 6; 
        wc |= 0x10000;
        /* Fallthrough */    
        
    case 3: 
        utf8[2] = 0x80 | (wc & 0x3f); 
        wc = wc >> 6; 
        wc |= 0x800;
        /* Fallthrough */    
        
    case 2: 
        utf8[1] = 0x80 | (wc & 0x3f); 
        wc = wc >> 6; 
        wc |= 0xc0;
        /* Fallthrough */    
        
    case 1: 
        utf8[0] = wc;
    }
  return count;
}

