# ==============================================================================
# Extract Unicode mappings from ISO 2022-JP data file
#
# Copyright (c) 2014 by the developers. See the LICENSE file for details.
#
# Attention:
# This program do not work with newer mapping tables that contain JIS codepoints
# that are mapped to Unicode codepoint sequences!


# ==============================================================================
# Represent extracted data as initialized C array

BEGIN \
{
   printf("%s\n", "/* ISO 2022-JP to Unicode mappings created by build1.mk */")
   printf("static const struct iso2022_jp  iso2022_jp_table[] =\n{\n")
}

END \
{
   printf("   /* Codepoint -1 is the end marker */\n")
   printf("   { -1L, -1L }\n")
   printf("};\n")
   printf("\n\n/* EOF */\n")
}


# ==============================================================================
# Ignore comment lines

/^#/ \
{
   next
}


# ==============================================================================
# Process lines

{
   # Extract JIS X 0208 codepoint from 1st field
   codepoint = extract_codepoint($1)
   mapping = extract_mapping($2)
   if("" != codepoint && "" != mapping)
   {
      # Format: { JIS X 0208 codepoint, Unicode codepoint }
      printf("   { %s, %s },\n", codepoint, mapping)
   }
   next
}


# ==============================================================================
# Extract JIS X 0208 codepoint

function extract_codepoint(s) \
{
   cp = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /3-[0-9A-F]+/))
      {
         if(2 < RLENGTH)
         {
            cp = "0x" substr(s, RSTART + 2, RLENGTH - 2) "L"
         }
      }
   }
   return(cp)
}


# ==============================================================================
# Extract Unicode codepoint

function extract_mapping(s) \
{
   cp = ""
   if("" != s)
   {
      # Check for start code point
      if(match(s, /U[+][0-9A-F]+/))
      {
         if(2 < RLENGTH)
         {
            cp = "0x" substr(s, RSTART + 2, RLENGTH - 2) "L"
         }
      }
   }
   return(cp)
}


# EOF
