#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <getopt.h>
#include <err.h>
#include <errno.h>
#include <string.h>
#include <iconv.h>
#include <fts.h>
#include <limits.h>
#include <stdint.h>
#include <libgen.h>
#include <sys/stat.h>

#define UCS2_PATH_MAX (PATH_MAX * 2)

struct translit {
	uint16_t codepoint;
	char *replace;
};

char *translit_low[] = {
  /* U+0000 */  "",    /* NULL */
  /* U+0001 */  "",    /* START OF HEADING */
  /* U+0002 */  "",    /* START OF TEXT */
  /* U+0003 */  "",    /* END OF TEXT */
  /* U+0004 */  "",    /* END OF TRANSMISSION */
  /* U+0005 */  "",    /* ENQUIRY */
  /* U+0006 */  "",    /* ACKNOWLEDGE */
  /* U+0007 */  "",    /* BELL */
  /* U+0008 */  "",    /* BACKSPACE */
  /* U+0009 */  "",    /* CHARACTER TABULATION */
  /* U+000A */  "",    /* LINE FEED */
  /* U+000B */  "",    /* LINE TABULATION */
  /* U+000C */  "",    /* FORM FEED */
  /* U+000D */  "",    /* CARRIAGE RETURN */
  /* U+000E */  "",    /* SHIFT OUT */
  /* U+000F */  "",    /* SHIFT IN */
  /* U+0010 */  "",    /* DATA LINK ESCAPE */
  /* U+0011 */  "",    /* DEVICE CONTROL ONE */
  /* U+0012 */  "",    /* DEVICE CONTROL TWO */
  /* U+0013 */  "",    /* DEVICE CONTROL THREE */
  /* U+0014 */  "",    /* DEVICE CONTROL FOUR */
  /* U+0015 */  "",    /* NEGATIVE ACKNOWLEDGE */
  /* U+0016 */  "",    /* SYNCHRONOUS IDLE */
  /* U+0017 */  "",    /* END OF TRANSMISSION BLOCK */
  /* U+0018 */  "",    /* CANCEL */
  /* U+0019 */  "",    /* END OF MEDIUM */
  /* U+001A */  "",    /* SUBSTITUTE */
  /* U+001B */  "",    /* ESCAPE */
  /* U+001C */  "",    /* INFORMATION SEPARATOR FOUR */
  /* U+001D */  "",    /* INFORMATION SEPARATOR THREE */
  /* U+001E */  "",    /* INFORMATION SEPARATOR TWO */
  /* U+001F */  "",    /* INFORMATION SEPARATOR ONE */
  /* U+0020 */  " ",   /* SPACE */
  /* U+0021 */  "!",   /* EXCLAMATION MARK */
  /* U+0022 */  "\"",   /* QUOTATION MARK */
  /* U+0023 */  "#",   /* NUMBER SIGN */
  /* U+0024 */  "$",   /* DOLLAR SIGN */
  /* U+0025 */  "%",   /* PERCENT SIGN */
  /* U+0026 */  "&",   /* AMPERSAND */
  /* U+0027 */  "'",   /* APOSTROPHE */
  /* U+0028 */  "(",   /* LEFT PARENTHESIS */
  /* U+0029 */  ")",   /* RIGHT PARENTHESIS */
  /* U+002A */  "*",   /* ASTERISK */
  /* U+002B */  "+",   /* PLUS SIGN */
  /* U+002C */  ",",   /* COMMA */
  /* U+002D */  "-",   /* HYPHEN-MINUS */
  /* U+002E */  ".",   /* FULL STOP */
  /* U+002F */  "",    /* SOLIDUS */
  /* U+0030 */  "0",   /* DIGIT ZERO */
  /* U+0031 */  "1",   /* DIGIT ONE */
  /* U+0032 */  "2",   /* DIGIT TWO */
  /* U+0033 */  "3",   /* DIGIT THREE */
  /* U+0034 */  "4",   /* DIGIT FOUR */
  /* U+0035 */  "5",   /* DIGIT FIVE */
  /* U+0036 */  "6",   /* DIGIT SIX */
  /* U+0037 */  "7",   /* DIGIT SEVEN */
  /* U+0038 */  "8",   /* DIGIT EIGHT */
  /* U+0039 */  "9",   /* DIGIT NINE */
  /* U+003A */  ":",   /* COLON */
  /* U+003B */  ";",   /* SEMICOLON */
  /* U+003C */  "<",   /* LESS-THAN SIGN */
  /* U+003D */  "=",   /* EQUALS SIGN */
  /* U+003E */  ">",   /* GREATER-THAN SIGN */
  /* U+003F */  "?",   /* QUESTION MARK */
  /* U+0040 */  "@",   /* COMMERCIAL AT */
  /* U+0041 */  "A",   /* LATIN CAPITAL LETTER A */
  /* U+0042 */  "B",   /* LATIN CAPITAL LETTER B */
  /* U+0043 */  "C",   /* LATIN CAPITAL LETTER C */
  /* U+0044 */  "D",   /* LATIN CAPITAL LETTER D */
  /* U+0045 */  "E",   /* LATIN CAPITAL LETTER E */
  /* U+0046 */  "F",   /* LATIN CAPITAL LETTER F */
  /* U+0047 */  "G",   /* LATIN CAPITAL LETTER G */
  /* U+0048 */  "H",   /* LATIN CAPITAL LETTER H */
  /* U+0049 */  "I",   /* LATIN CAPITAL LETTER I */
  /* U+004A */  "J",   /* LATIN CAPITAL LETTER J */
  /* U+004B */  "K",   /* LATIN CAPITAL LETTER K */
  /* U+004C */  "L",   /* LATIN CAPITAL LETTER L */
  /* U+004D */  "M",   /* LATIN CAPITAL LETTER M */
  /* U+004E */  "N",   /* LATIN CAPITAL LETTER N */
  /* U+004F */  "O",   /* LATIN CAPITAL LETTER O */
  /* U+0050 */  "P",   /* LATIN CAPITAL LETTER P */
  /* U+0051 */  "Q",   /* LATIN CAPITAL LETTER Q */
  /* U+0052 */  "R",   /* LATIN CAPITAL LETTER R */
  /* U+0053 */  "S",   /* LATIN CAPITAL LETTER S */
  /* U+0054 */  "T",   /* LATIN CAPITAL LETTER T */
  /* U+0055 */  "U",   /* LATIN CAPITAL LETTER U */
  /* U+0056 */  "V",   /* LATIN CAPITAL LETTER V */
  /* U+0057 */  "W",   /* LATIN CAPITAL LETTER W */
  /* U+0058 */  "X",   /* LATIN CAPITAL LETTER X */
  /* U+0059 */  "Y",   /* LATIN CAPITAL LETTER Y */
  /* U+005A */  "Z",   /* LATIN CAPITAL LETTER Z */
  /* U+005B */  "[",   /* LEFT SQUARE BRACKET */
  /* U+005C */  "\\",  /* REVERSE SOLIDUS */
  /* U+005D */  "]",   /* RIGHT SQUARE BRACKET */
  /* U+005E */  "^",   /* CIRCUMFLEX ACCENT */
  /* U+005F */  "_",   /* LOW LINE */
  /* U+0060 */  "`",   /* GRAVE ACCENT */
  /* U+0061 */  "a",   /* LATIN SMALL LETTER A */
  /* U+0062 */  "b",   /* LATIN SMALL LETTER B */
  /* U+0063 */  "c",   /* LATIN SMALL LETTER C */
  /* U+0064 */  "d",   /* LATIN SMALL LETTER D */
  /* U+0065 */  "e",   /* LATIN SMALL LETTER E */
  /* U+0066 */  "f",   /* LATIN SMALL LETTER F */
  /* U+0067 */  "g",   /* LATIN SMALL LETTER G */
  /* U+0068 */  "h",   /* LATIN SMALL LETTER H */
  /* U+0069 */  "i",   /* LATIN SMALL LETTER I */
  /* U+006A */  "j",   /* LATIN SMALL LETTER J */
  /* U+006B */  "k",   /* LATIN SMALL LETTER K */
  /* U+006C */  "l",   /* LATIN SMALL LETTER L */
  /* U+006D */  "m",   /* LATIN SMALL LETTER M */
  /* U+006E */  "n",   /* LATIN SMALL LETTER N */
  /* U+006F */  "o",   /* LATIN SMALL LETTER O */
  /* U+0070 */  "p",   /* LATIN SMALL LETTER P */
  /* U+0071 */  "q",   /* LATIN SMALL LETTER Q */
  /* U+0072 */  "r",   /* LATIN SMALL LETTER R */
  /* U+0073 */  "s",   /* LATIN SMALL LETTER S */
  /* U+0074 */  "t",   /* LATIN SMALL LETTER T */
  /* U+0075 */  "u",   /* LATIN SMALL LETTER U */
  /* U+0076 */  "v",   /* LATIN SMALL LETTER V */
  /* U+0077 */  "w",   /* LATIN SMALL LETTER W */
  /* U+0078 */  "x",   /* LATIN SMALL LETTER X */
  /* U+0079 */  "y",   /* LATIN SMALL LETTER Y */
  /* U+007A */  "z",   /* LATIN SMALL LETTER Z */
  /* U+007B */  "{",   /* LEFT CURLY BRACKET */
  /* U+007C */  "|",   /* VERTICAL LINE */
  /* U+007D */  "}",   /* RIGHT CURLY BRACKET */
  /* U+007E */  "~",   /* TILDE */
  /* U+007F */  "",    /* DELETE */
  /* U+0080 */  "",    /* <control> */
  /* U+0081 */  "",    /* <control> */
  /* U+0082 */  "",    /* BREAK PERMITTED HERE */
  /* U+0083 */  "",    /* NO BREAK HERE */
  /* U+0084 */  "",    /* <control> */
  /* U+0085 */  "",    /* NEXT LINE */
  /* U+0086 */  "",    /* START OF SELECTED AREA */
  /* U+0087 */  "",    /* END OF SELECTED AREA */
  /* U+0088 */  "",    /* CHARACTER TABULATION SET */
  /* U+0089 */  "",    /* CHARACTER TABULATION WITH JUSTIFICATION */
  /* U+008A */  "",    /* LINE TABULATION SET */
  /* U+008B */  "",    /* PARTIAL LINE FORWARD */
  /* U+008C */  "",    /* PARTIAL LINE BACKWARD */
  /* U+008D */  "",    /* REVERSE LINE FEED */
  /* U+008E */  "",    /* SINGLE SHIFT TWO */
  /* U+008F */  "",    /* SINGLE SHIFT THREE */
  /* U+0090 */  "",    /* DEVICE CONTROL STRING */
  /* U+0091 */  "",    /* PRIVATE USE ONE */
  /* U+0092 */  "",    /* PRIVATE USE TWO */
  /* U+0093 */  "",    /* SET TRANSMIT STATE */
  /* U+0094 */  "",    /* CANCEL CHARACTER */
  /* U+0095 */  "",    /* MESSAGE WAITING */
  /* U+0096 */  "",    /* START OF GUARDED AREA */
  /* U+0097 */  "",    /* END OF GUARDED AREA */
  /* U+0098 */  "",    /* START OF STRING */
  /* U+0099 */  "",    /* <control> */
  /* U+009A */  "",    /* SINGLE CHARACTER INTRODUCER */
  /* U+009B */  "",    /* CONTROL SEQUENCE INTRODUCER */
  /* U+009C */  "",    /* STRING TERMINATOR */
  /* U+009D */  "",    /* OPERATING SYSTEM COMMAND */
  /* U+009E */  "",    /* PRIVACY MESSAGE */
  /* U+009F */  "",    /* APPLICATION PROGRAM COMMAND */
  /* U+00A0 */  " ",   /* NO-BREAK SPACE */
  /* U+00A1 */  "!",   /* INVERTED EXCLAMATION MARK */
  /* U+00A2 */  "c",   /* CENT SIGN */
  /* U+00A3 */  "L",   /* POUND SIGN */
  /* U+00A4 */  "",    /* CURRENCY SIGN */
  /* U+00A5 */  "Y",   /* YEN SIGN */
  /* U+00A6 */  "|",   /* BROKEN BAR */
  /* U+00A7 */  "",    /* SECTION SIGN */
  /* U+00A8 */  "",    /* DIAERESIS */
  /* U+00A9 */  "(C)", /* COPYRIGHT SIGN */
  /* U+00AA */  "a",   /* FEMININE ORDINAL INDICATOR */
  /* U+00AB */  "<<",  /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
  /* U+00AC */  "",    /* NOT SIGN */
  /* U+00AD */  "-",   /* SOFT HYPHEN */
  /* U+00AE */  "(R)", /* REGISTERED SIGN */
  /* U+00AF */  "-",   /* MACRON */
  /* U+00B0 */  "o",   /* DEGREE SIGN */
  /* U+00B1 */  "",    /* PLUS-MINUS SIGN */
  /* U+00B2 */  "2",   /* SUPERSCRIPT TWO */
  /* U+00B3 */  "3",   /* SUPERSCRIPT THREE */
  /* U+00B4 */  "",    /* ACUTE ACCENT */
  /* U+00B5 */  "mu",  /* MICRO SIGN */
  /* U+00B6 */  "",    /* PILCROW SIGN */
  /* U+00B7 */  ".",   /* MIDDLE DOT */
  /* U+00B8 */  "",    /* CEDILLA */
  /* U+00B9 */  "1",   /* SUPERSCRIPT ONE */
  /* U+00BA */  "o",   /* MASCULINE ORDINAL INDICATOR */
  /* U+00BB */  ">>",   /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
  /* U+00BC */  "",    /* VULGAR FRACTION ONE QUARTER */
  /* U+00BD */  "",    /* VULGAR FRACTION ONE HALF */
  /* U+00BE */  "",    /* VULGAR FRACTION THREE QUARTERS */
  /* U+00BF */  "?",   /* INVERTED QUESTION MARK */
  /* U+00C0 */  "A",   /* LATIN CAPITAL LETTER A WITH GRAVE */
  /* U+00C1 */  "A",   /* LATIN CAPITAL LETTER A WITH ACUTE */
  /* U+00C2 */  "A",   /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
  /* U+00C3 */  "A",   /* LATIN CAPITAL LETTER A WITH TILDE */
  /* U+00C4 */  "A",   /* LATIN CAPITAL LETTER A WITH DIAERESIS */
  /* U+00C5 */  "A",   /* LATIN CAPITAL LETTER A WITH RING ABOVE */
  /* U+00C6 */  "AE",   /* LATIN CAPITAL LETTER AE */
  /* U+00C7 */  "C",   /* LATIN CAPITAL LETTER C WITH CEDILLA */
  /* U+00C8 */  "E",   /* LATIN CAPITAL LETTER E WITH GRAVE */
  /* U+00C9 */  "E",   /* LATIN CAPITAL LETTER E WITH ACUTE */
  /* U+00CA */  "E",   /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
  /* U+00CB */  "E",   /* LATIN CAPITAL LETTER E WITH DIAERESIS */
  /* U+00CC */  "I",   /* LATIN CAPITAL LETTER I WITH GRAVE */
  /* U+00CD */  "I",   /* LATIN CAPITAL LETTER I WITH ACUTE */
  /* U+00CE */  "I",   /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
  /* U+00CF */  "I",   /* LATIN CAPITAL LETTER I WITH DIAERESIS */
  /* U+00D0 */  "",    /* LATIN CAPITAL LETTER ETH */
  /* U+00D1 */  "N",   /* LATIN CAPITAL LETTER N WITH TILDE */
  /* U+00D2 */  "O",   /* LATIN CAPITAL LETTER O WITH GRAVE */
  /* U+00D3 */  "O",   /* LATIN CAPITAL LETTER O WITH ACUTE */
  /* U+00D4 */  "O",   /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
  /* U+00D5 */  "O",   /* LATIN CAPITAL LETTER O WITH TILDE */
  /* U+00D6 */  "O",   /* LATIN CAPITAL LETTER O WITH DIAERESIS */
  /* U+00D7 */  "x",   /* MULTIPLICATION SIGN */
  /* U+00D8 */  "O",   /* LATIN CAPITAL LETTER O WITH STROKE */
  /* U+00D9 */  "U",   /* LATIN CAPITAL LETTER U WITH GRAVE */
  /* U+00DA */  "U",   /* LATIN CAPITAL LETTER U WITH ACUTE */
  /* U+00DB */  "U",   /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
  /* U+00DC */  "U",   /* LATIN CAPITAL LETTER U WITH DIAERESIS */
  /* U+00DD */  "Y",   /* LATIN CAPITAL LETTER Y WITH ACUTE */
  /* U+00DE */  "",    /* LATIN CAPITAL LETTER THORN */
  /* U+00DF */  "",    /* LATIN SMALL LETTER SHARP S */
  /* U+00E0 */  "a",   /* LATIN SMALL LETTER A WITH GRAVE */
  /* U+00E1 */  "a",   /* LATIN SMALL LETTER A WITH ACUTE */
  /* U+00E2 */  "a",   /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
  /* U+00E3 */  "a",   /* LATIN SMALL LETTER A WITH TILDE */
  /* U+00E4 */  "a",   /* LATIN SMALL LETTER A WITH DIAERESIS */
  /* U+00E5 */  "a",   /* LATIN SMALL LETTER A WITH RING ABOVE */
  /* U+00E6 */  "ae",  /* LATIN SMALL LETTER AE */
  /* U+00E7 */  "c",   /* LATIN SMALL LETTER C WITH CEDILLA */
  /* U+00E8 */  "e",   /* LATIN SMALL LETTER E WITH GRAVE */
  /* U+00E9 */  "e",   /* LATIN SMALL LETTER E WITH ACUTE */
  /* U+00EA */  "e",   /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
  /* U+00EB */  "e",   /* LATIN SMALL LETTER E WITH DIAERESIS */
  /* U+00EC */  "i",   /* LATIN SMALL LETTER I WITH GRAVE */
  /* U+00ED */  "i",   /* LATIN SMALL LETTER I WITH ACUTE */
  /* U+00EE */  "i",   /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
  /* U+00EF */  "i",   /* LATIN SMALL LETTER I WITH DIAERESIS */
  /* U+00F0 */  "",    /* LATIN SMALL LETTER ETH */
  /* U+00F1 */  "n",   /* LATIN SMALL LETTER N WITH TILDE */
  /* U+00F2 */  "o",   /* LATIN SMALL LETTER O WITH GRAVE */
  /* U+00F3 */  "o",   /* LATIN SMALL LETTER O WITH ACUTE */
  /* U+00F4 */  "o",   /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
  /* U+00F5 */  "o",   /* LATIN SMALL LETTER O WITH TILDE */
  /* U+00F6 */  "o",   /* LATIN SMALL LETTER O WITH DIAERESIS */
  /* U+00F7 */  "",    /* DIVISION SIGN */
  /* U+00F8 */  "o",   /* LATIN SMALL LETTER O WITH STROKE */
  /* U+00F9 */  "u",   /* LATIN SMALL LETTER U WITH GRAVE */
  /* U+00FA */  "u",   /* LATIN SMALL LETTER U WITH ACUTE */
  /* U+00FB */  "u",   /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
  /* U+00FC */  "u",   /* LATIN SMALL LETTER U WITH DIAERESIS */
  /* U+00FD */  "y",   /* LATIN SMALL LETTER Y WITH ACUTE */
  /* U+00FE */  "",    /* LATIN SMALL LETTER THORN */
  /* U+00FF */  "y",   /* LATIN SMALL LETTER Y WITH DIAERESIS */
  /* U+0100 */  "A",   /* LATIN CAPITAL LETTER A WITH MACRON */
  /* U+0101 */  "a",   /* LATIN SMALL LETTER A WITH MACRON */
  /* U+0102 */  "A",   /* LATIN CAPITAL LETTER A WITH BREVE */
  /* U+0103 */  "a",   /* LATIN SMALL LETTER A WITH BREVE */
  /* U+0104 */  "A",   /* LATIN CAPITAL LETTER A WITH OGONEK */
  /* U+0105 */  "a",   /* LATIN SMALL LETTER A WITH OGONEK */
  /* U+0106 */  "C",   /* LATIN CAPITAL LETTER C WITH ACUTE */
  /* U+0107 */  "c",   /* LATIN SMALL LETTER C WITH ACUTE */
  /* U+0108 */  "C",   /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */
  /* U+0109 */  "c",   /* LATIN SMALL LETTER C WITH CIRCUMFLEX */
  /* U+010A */  "C",   /* LATIN CAPITAL LETTER C WITH DOT ABOVE */
  /* U+010B */  "c",   /* LATIN SMALL LETTER C WITH DOT ABOVE */
  /* U+010C */  "C",   /* LATIN CAPITAL LETTER C WITH CARON */
  /* U+010D */  "c",   /* LATIN SMALL LETTER C WITH CARON */
  /* U+010E */  "D",   /* LATIN CAPITAL LETTER D WITH CARON */
  /* U+010F */  "d",   /* LATIN SMALL LETTER D WITH CARON */
  /* U+0110 */  "D",   /* LATIN CAPITAL LETTER D WITH STROKE */
  /* U+0111 */  "d",   /* LATIN SMALL LETTER D WITH STROKE */
  /* U+0112 */  "E",   /* LATIN CAPITAL LETTER E WITH MACRON */
  /* U+0113 */  "e",   /* LATIN SMALL LETTER E WITH MACRON */
  /* U+0114 */  "E",   /* LATIN CAPITAL LETTER E WITH BREVE */
  /* U+0115 */  "e",   /* LATIN SMALL LETTER E WITH BREVE */
  /* U+0116 */  "E",   /* LATIN CAPITAL LETTER E WITH DOT ABOVE */
  /* U+0117 */  "e",   /* LATIN SMALL LETTER E WITH DOT ABOVE */
  /* U+0118 */  "E",   /* LATIN CAPITAL LETTER E WITH OGONEK */
  /* U+0119 */  "e",   /* LATIN SMALL LETTER E WITH OGONEK */
  /* U+011A */  "E",   /* LATIN CAPITAL LETTER E WITH CARON */
  /* U+011B */  "e",   /* LATIN SMALL LETTER E WITH CARON */
  /* U+011C */  "G",   /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */
  /* U+011D */  "g",   /* LATIN SMALL LETTER G WITH CIRCUMFLEX */
  /* U+011E */  "G",   /* LATIN CAPITAL LETTER G WITH BREVE */
  /* U+011F */  "g",   /* LATIN SMALL LETTER G WITH BREVE */
  /* U+0120 */  "G",   /* LATIN CAPITAL LETTER G WITH DOT ABOVE */
  /* U+0121 */  "g",   /* LATIN SMALL LETTER G WITH DOT ABOVE */
  /* U+0122 */  "G",   /* LATIN CAPITAL LETTER G WITH CEDILLA */
  /* U+0123 */  "g",   /* LATIN SMALL LETTER G WITH CEDILLA */
  /* U+0124 */  "H",   /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX */
  /* U+0125 */  "h",   /* LATIN SMALL LETTER H WITH CIRCUMFLEX */
  /* U+0126 */  "H",   /* LATIN CAPITAL LETTER H WITH STROKE */
  /* U+0127 */  "h",   /* LATIN SMALL LETTER H WITH STROKE */
  /* U+0128 */  "I",   /* LATIN CAPITAL LETTER I WITH TILDE */
  /* U+0129 */  "i",   /* LATIN SMALL LETTER I WITH TILDE */
  /* U+012A */  "I",   /* LATIN CAPITAL LETTER I WITH MACRON */
  /* U+012B */  "i",   /* LATIN SMALL LETTER I WITH MACRON */
  /* U+012C */  "I",   /* LATIN CAPITAL LETTER I WITH BREVE */
  /* U+012D */  "i",   /* LATIN SMALL LETTER I WITH BREVE */
  /* U+012E */  "I",   /* LATIN CAPITAL LETTER I WITH OGONEK */
  /* U+012F */  "i",   /* LATIN SMALL LETTER I WITH OGONEK */
  /* U+0130 */  "I",   /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
  /* U+0131 */  "i",   /* LATIN SMALL LETTER DOTLESS I */
  /* U+0132 */  "IJ",  /* LATIN CAPITAL LIGATURE IJ */
  /* U+0133 */  "ij",  /* LATIN SMALL LIGATURE IJ */
  /* U+0134 */  "J",   /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX */
  /* U+0135 */  "j",   /* LATIN SMALL LETTER J WITH CIRCUMFLEX */
  /* U+0136 */  "K",   /* LATIN CAPITAL LETTER K WITH CEDILLA */
  /* U+0137 */  "k",   /* LATIN SMALL LETTER K WITH CEDILLA */
  /* U+0138 */  "k",   /* LATIN SMALL LETTER KRA */
  /* U+0139 */  "L",   /* LATIN CAPITAL LETTER L WITH ACUTE */
  /* U+013A */  "l",   /* LATIN SMALL LETTER L WITH ACUTE */
  /* U+013B */  "L",   /* LATIN CAPITAL LETTER L WITH CEDILLA */
  /* U+013C */  "l",   /* LATIN SMALL LETTER L WITH CEDILLA */
  /* U+013D */  "L",   /* LATIN CAPITAL LETTER L WITH CARON */
  /* U+013E */  "l",   /* LATIN SMALL LETTER L WITH CARON */
  /* U+013F */  "L",   /* LATIN CAPITAL LETTER L WITH MIDDLE DOT */
  /* U+0140 */  "l",   /* LATIN SMALL LETTER L WITH MIDDLE DOT */
  /* U+0141 */  "L",   /* LATIN CAPITAL LETTER L WITH STROKE */
  /* U+0142 */  "l",   /* LATIN SMALL LETTER L WITH STROKE */
  /* U+0143 */  "N",   /* LATIN CAPITAL LETTER N WITH ACUTE */
  /* U+0144 */  "n",   /* LATIN SMALL LETTER N WITH ACUTE */
  /* U+0145 */  "N",   /* LATIN CAPITAL LETTER N WITH CEDILLA */
  /* U+0146 */  "n",   /* LATIN SMALL LETTER N WITH CEDILLA */
  /* U+0147 */  "N",   /* LATIN CAPITAL LETTER N WITH CARON */
  /* U+0148 */  "n",   /* LATIN SMALL LETTER N WITH CARON */
  /* U+0149 */  "n",   /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
  /* U+014A */  "N",   /* LATIN CAPITAL LETTER ENG */
  /* U+014B */  "n",   /* LATIN SMALL LETTER ENG */
  /* U+014C */  "O",   /* LATIN CAPITAL LETTER O WITH MACRON */
  /* U+014D */  "o",   /* LATIN SMALL LETTER O WITH MACRON */
  /* U+014E */  "O",   /* LATIN CAPITAL LETTER O WITH BREVE */
  /* U+014F */  "o",   /* LATIN SMALL LETTER O WITH BREVE */
  /* U+0150 */  "O",   /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
  /* U+0151 */  "o",   /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
  /* U+0152 */  "OE",  /* LATIN CAPITAL LIGATURE OE */
  /* U+0153 */  "oe",  /* LATIN SMALL LIGATURE OE */
  /* U+0154 */  "R",   /* LATIN CAPITAL LETTER R WITH ACUTE */
  /* U+0155 */  "r",   /* LATIN SMALL LETTER R WITH ACUTE */
  /* U+0156 */  "R",   /* LATIN CAPITAL LETTER R WITH CEDILLA */
  /* U+0157 */  "r",   /* LATIN SMALL LETTER R WITH CEDILLA */
  /* U+0158 */  "R",   /* LATIN CAPITAL LETTER R WITH CARON */
  /* U+0159 */  "r",   /* LATIN SMALL LETTER R WITH CARON */
  /* U+015A */  "S",   /* LATIN CAPITAL LETTER S WITH ACUTE */
  /* U+015B */  "s",   /* LATIN SMALL LETTER S WITH ACUTE */
  /* U+015C */  "S",   /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX */
  /* U+015D */  "s",   /* LATIN SMALL LETTER S WITH CIRCUMFLEX */
  /* U+015E */  "S",   /* LATIN CAPITAL LETTER S WITH CEDILLA */
  /* U+015F */  "s",   /* LATIN SMALL LETTER S WITH CEDILLA */
  /* U+0160 */  "S",   /* LATIN CAPITAL LETTER S WITH CARON */
  /* U+0161 */  "s",   /* LATIN SMALL LETTER S WITH CARON */
  /* U+0162 */  "T",   /* LATIN CAPITAL LETTER T WITH CEDILLA */
  /* U+0163 */  "t",   /* LATIN SMALL LETTER T WITH CEDILLA */
  /* U+0164 */  "T",   /* LATIN CAPITAL LETTER T WITH CARON */
  /* U+0165 */  "t",   /* LATIN SMALL LETTER T WITH CARON */
  /* U+0166 */  "T",   /* LATIN CAPITAL LETTER T WITH STROKE */
  /* U+0167 */  "t",   /* LATIN SMALL LETTER T WITH STROKE */
  /* U+0168 */  "U",   /* LATIN CAPITAL LETTER U WITH TILDE */
  /* U+0169 */  "u",   /* LATIN SMALL LETTER U WITH TILDE */
  /* U+016A */  "U",   /* LATIN CAPITAL LETTER U WITH MACRON */
  /* U+016B */  "u",   /* LATIN SMALL LETTER U WITH MACRON */
  /* U+016C */  "U",   /* LATIN CAPITAL LETTER U WITH BREVE */
  /* U+016D */  "u",   /* LATIN SMALL LETTER U WITH BREVE */
  /* U+016E */  "U",   /* LATIN CAPITAL LETTER U WITH RING ABOVE */
  /* U+016F */  "u",   /* LATIN SMALL LETTER U WITH RING ABOVE */
  /* U+0170 */  "U",   /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
  /* U+0171 */  "u",   /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */
  /* U+0172 */  "U",   /* LATIN CAPITAL LETTER U WITH OGONEK */
  /* U+0173 */  "u",   /* LATIN SMALL LETTER U WITH OGONEK */
  /* U+0174 */  "W",   /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX */
  /* U+0175 */  "w",   /* LATIN SMALL LETTER W WITH CIRCUMFLEX */
  /* U+0176 */  "Y",   /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX */
  /* U+0177 */  "y",   /* LATIN SMALL LETTER Y WITH CIRCUMFLEX */
  /* U+0178 */  "Y",   /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
  /* U+0179 */  "Z",   /* LATIN CAPITAL LETTER Z WITH ACUTE */
  /* U+017A */  "z",   /* LATIN SMALL LETTER Z WITH ACUTE */
  /* U+017B */  "Z",   /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
  /* U+017C */  "z",   /* LATIN SMALL LETTER Z WITH DOT ABOVE */
  /* U+017D */  "Z",   /* LATIN CAPITAL LETTER Z WITH CARON */
  /* U+017E */  "z",   /* LATIN SMALL LETTER Z WITH CARON */
  /* U+017F */  "s",   /* LATIN SMALL LETTER LONG S */
  /* U+0180 */  "b",   /* LATIN SMALL LETTER B WITH STROKE */
  /* U+0181 */  "B",   /* LATIN CAPITAL LETTER B WITH HOOK */
  /* U+0182 */  "B",   /* LATIN CAPITAL LETTER B WITH TOPBAR */
  /* U+0183 */  "b",   /* LATIN SMALL LETTER B WITH TOPBAR */
  /* U+0184 */  "6",   /* LATIN CAPITAL LETTER TONE SIX */
  /* U+0185 */  "6",   /* LATIN SMALL LETTER TONE SIX */
  /* U+0186 */  "0",   /* LATIN CAPITAL LETTER OPEN O */
  /* U+0187 */  "C",   /* LATIN CAPITAL LETTER C WITH HOOK */
  /* U+0188 */  "c",   /* LATIN SMALL LETTER C WITH HOOK */
  /* U+0189 */  "D",   /* LATIN CAPITAL LETTER AFRICAN D */
  /* U+018A */  "D",   /* LATIN CAPITAL LETTER D WITH HOOK */
  /* U+018B */  "D",   /* LATIN CAPITAL LETTER D WITH TOPBAR */
  /* U+018C */  "d",   /* LATIN SMALL LETTER D WITH TOPBAR */
  /* U+018D */  "",    /* LATIN SMALL LETTER TURNED DELTA */
  /* U+018E */  "",    /* LATIN CAPITAL LETTER REVERSED E */
  /* U+018F */  "",    /* LATIN CAPITAL LETTER SCHWA */
  /* U+0190 */  "E",   /* LATIN CAPITAL LETTER OPEN E */
  /* U+0191 */  "F",   /* LATIN CAPITAL LETTER F WITH HOOK */
  /* U+0192 */  "f",   /* LATIN SMALL LETTER F WITH HOOK */
  /* U+0193 */  "G",   /* LATIN CAPITAL LETTER G WITH HOOK */
  /* U+0194 */  "",    /* LATIN CAPITAL LETTER GAMMA */
  /* U+0195 */  "",    /* LATIN SMALL LETTER HV */
  /* U+0196 */  "",    /* LATIN CAPITAL LETTER IOTA */
  /* U+0197 */  "I",   /* LATIN CAPITAL LETTER I WITH STROKE */
  /* U+0198 */  "K",   /* LATIN CAPITAL LETTER K WITH HOOK */
  /* U+0199 */  "k",   /* LATIN SMALL LETTER K WITH HOOK */
  /* U+019A */  "l",   /* LATIN SMALL LETTER L WITH BAR */
  /* U+019B */  "",    /* LATIN SMALL LETTER LAMBDA WITH STROKE */
  /* U+019C */  "M",   /* LATIN CAPITAL LETTER TURNED M */
  /* U+019D */  "N",   /* LATIN CAPITAL LETTER N WITH LEFT HOOK */
  /* U+019E */  "n",   /* LATIN SMALL LETTER N WITH LONG RIGHT LEG */
  /* U+019F */  "O",   /* LATIN CAPITAL LETTER O WITH MIDDLE TILDE */
  /* U+01A0 */  "O",   /* LATIN CAPITAL LETTER O WITH HORN */
  /* U+01A1 */  "o",   /* LATIN SMALL LETTER O WITH HORN */
  /* U+01A2 */  "",    /* LATIN CAPITAL LETTER OI */
  /* U+01A3 */  "",    /* LATIN SMALL LETTER OI */
  /* U+01A4 */  "P",   /* LATIN CAPITAL LETTER P WITH HOOK */
  /* U+01A5 */  "p",   /* LATIN SMALL LETTER P WITH HOOK */
  /* U+01A6 */  "",    /* LATIN LETTER YR */
  /* U+01A7 */  "2",   /* LATIN CAPITAL LETTER TONE TWO */
  /* U+01A8 */  "2",   /* LATIN SMALL LETTER TONE TWO */
  /* U+01A9 */  "",    /* LATIN CAPITAL LETTER ESH */
  /* U+01AA */  "",    /* LATIN LETTER REVERSED ESH LOOP */
  /* U+01AB */  "t",   /* LATIN SMALL LETTER T WITH PALATAL HOOK */
  /* U+01AC */  "T",   /* LATIN CAPITAL LETTER T WITH HOOK */
  /* U+01AD */  "t",   /* LATIN SMALL LETTER T WITH HOOK */
  /* U+01AE */  "U",   /* LATIN CAPITAL LETTER T WITH RETROFLEX HOOK */
  /* U+01AF */  "U",   /* LATIN CAPITAL LETTER U WITH HORN */
  /* U+01B0 */  "u",   /* LATIN SMALL LETTER U WITH HORN */
  /* U+01B1 */  "",    /* LATIN CAPITAL LETTER UPSILON */
  /* U+01B2 */  "V",   /* LATIN CAPITAL LETTER V WITH HOOK */
  /* U+01B3 */  "Y",   /* LATIN CAPITAL LETTER Y WITH HOOK */
  /* U+01B4 */  "y",   /* LATIN SMALL LETTER Y WITH HOOK */
  /* U+01B5 */  "Z",   /* LATIN CAPITAL LETTER Z WITH STROKE */
  /* U+01B6 */  "z",   /* LATIN SMALL LETTER Z WITH STROKE */
  /* U+01B7 */  "",    /* LATIN CAPITAL LETTER EZH */
  /* U+01B8 */  "",    /* LATIN CAPITAL LETTER EZH REVERSED */
  /* U+01B9 */  "",    /* LATIN SMALL LETTER EZH REVERSED */
  /* U+01BA */  "",    /* LATIN SMALL LETTER EZH WITH TAIL */
  /* U+01BB */  "2",   /* LATIN LETTER TWO WITH STROKE */
  /* U+01BC */  "5",   /* LATIN CAPITAL LETTER TONE FIVE */
  /* U+01BD */  "5",   /* LATIN SMALL LETTER TONE FIVE */
  /* U+01BE */  "",    /* LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE */
  /* U+01BF */  "",    /* LATIN LETTER WYNN */
  /* U+01C0 */  "",    /* LATIN LETTER DENTAL CLICK */
  /* U+01C1 */  "",    /* LATIN LETTER LATERAL CLICK */
  /* U+01C2 */  "",    /* LATIN LETTER ALVEOLAR CLICK */
  /* U+01C3 */  "",    /* LATIN LETTER RETROFLEX CLICK */
  /* U+01C4 */  "DZ",  /* LATIN CAPITAL LETTER DZ WITH CARON */
  /* U+01C5 */  "Dz",  /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON */
  /* U+01C6 */  "dz",  /* LATIN SMALL LETTER DZ WITH CARON */
  /* U+01C7 */  "LJ",  /* LATIN CAPITAL LETTER LJ */
  /* U+01C8 */  "Lj",  /* LATIN CAPITAL LETTER L WITH SMALL LETTER J */
  /* U+01C9 */  "lj",  /* LATIN SMALL LETTER LJ */
  /* U+01CA */  "NJ",  /* LATIN CAPITAL LETTER NJ */
  /* U+01CB */  "Nj",  /* LATIN CAPITAL LETTER N WITH SMALL LETTER J */
  /* U+01CC */  "nj",  /* LATIN SMALL LETTER NJ */
  /* U+01CD */  "A",   /* LATIN CAPITAL LETTER A WITH CARON */
  /* U+01CE */  "a",   /* LATIN SMALL LETTER A WITH CARON */
  /* U+01CF */  "I",   /* LATIN CAPITAL LETTER I WITH CARON */
  /* U+01D0 */  "i",   /* LATIN SMALL LETTER I WITH CARON */
  /* U+01D1 */  "O",   /* LATIN CAPITAL LETTER O WITH CARON */
  /* U+01D2 */  "o",   /* LATIN SMALL LETTER O WITH CARON */
  /* U+01D3 */  "U",   /* LATIN CAPITAL LETTER U WITH CARON */
  /* U+01D4 */  "u",   /* LATIN SMALL LETTER U WITH CARON */
  /* U+01D5 */  "U",   /* LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON */
  /* U+01D6 */  "u",   /* LATIN SMALL LETTER U WITH DIAERESIS AND MACRON */
  /* U+01D7 */  "U",   /* LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE */
  /* U+01D8 */  "u",   /* LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE */
  /* U+01D9 */  "U",   /* LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON */
  /* U+01DA */  "u",   /* LATIN SMALL LETTER U WITH DIAERESIS AND CARON */
  /* U+01DB */  "U",   /* LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE */
  /* U+01DC */  "u",   /* LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE */
  /* U+01DD */  "",    /* LATIN SMALL LETTER TURNED E */
  /* U+01DE */  "A",   /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
  /* U+01DF */  "a",   /* LATIN SMALL LETTER A WITH DIAERESIS AND MACRON */
  /* U+01E0 */  "1",   /* LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON */
  /* U+01E1 */  "a",   /* LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON */
  /* U+01E2 */  "AE",  /* LATIN CAPITAL LETTER AE WITH MACRON */
  /* U+01E3 */  "ae",  /* LATIN SMALL LETTER AE WITH MACRON */
  /* U+01E4 */  "G",   /* LATIN CAPITAL LETTER G WITH STROKE */
  /* U+01E5 */  "g",   /* LATIN SMALL LETTER G WITH STROKE */
  /* U+01E6 */  "G",   /* LATIN CAPITAL LETTER G WITH CARON */
  /* U+01E7 */  "g",   /* LATIN SMALL LETTER G WITH CARON */
  /* U+01E8 */  "K",   /* LATIN CAPITAL LETTER K WITH CARON */
  /* U+01E9 */  "k",   /* LATIN SMALL LETTER K WITH CARON */
  /* U+01EA */  "O",   /* LATIN CAPITAL LETTER O WITH OGONEK */
  /* U+01EB */  "o",   /* LATIN SMALL LETTER O WITH OGONEK */
  /* U+01EC */  "O",   /* LATIN CAPITAL LETTER O WITH OGONEK AND MACRON */
  /* U+01ED */  "o",   /* LATIN SMALL LETTER O WITH OGONEK AND MACRON */
  /* U+01EE */  "",    /* LATIN CAPITAL LETTER EZH WITH CARON */
  /* U+01EF */  "",    /* LATIN SMALL LETTER EZH WITH CARON */
  /* U+01F0 */  "j",   /* LATIN SMALL LETTER J WITH CARON */
  /* U+01F1 */  "DZ",  /* LATIN CAPITAL LETTER DZ */
  /* U+01F2 */  "Dz",  /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z */
  /* U+01F3 */  "dz",  /* LATIN SMALL LETTER DZ */
  /* U+01F4 */  "G",   /* LATIN CAPITAL LETTER G WITH ACUTE */
  /* U+01F5 */  "g",   /* LATIN SMALL LETTER G WITH ACUTE */
  /* U+01F6 */  "",    /* LATIN CAPITAL LETTER HWAIR */
  /* U+01F7 */  "",    /* LATIN CAPITAL LETTER WYNN */
  /* U+01F8 */  "N",   /* LATIN CAPITAL LETTER N WITH GRAVE */
  /* U+01F9 */  "n",   /* LATIN SMALL LETTER N WITH GRAVE */
  /* U+01FA */  "A",   /* LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE */
  /* U+01FB */  "a",   /* LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE */
  /* U+01FC */  "AE",  /* LATIN CAPITAL LETTER AE WITH ACUTE */
  /* U+01FD */  "ae",  /* LATIN SMALL LETTER AE WITH ACUTE */
  /* U+01FE */  "U",   /* LATIN CAPITAL LETTER O WITH STROKE AND ACUTE */
  /* U+01FF */  "u",   /* LATIN SMALL LETTER O WITH STROKE AND ACUTE */
  /* U+0200 */  "A",   /* LATIN CAPITAL LETTER A WITH DOUBLE GRAVE */
  /* U+0201 */  "a",   /* LATIN SMALL LETTER A WITH DOUBLE GRAVE */
  /* U+0202 */  "A",   /* LATIN CAPITAL LETTER A WITH INVERTED BREVE */
  /* U+0203 */  "a",   /* LATIN SMALL LETTER A WITH INVERTED BREVE */
  /* U+0204 */  "E",   /* LATIN CAPITAL LETTER E WITH DOUBLE GRAVE */
  /* U+0205 */  "e",   /* LATIN SMALL LETTER E WITH DOUBLE GRAVE */
  /* U+0206 */  "E",   /* LATIN CAPITAL LETTER E WITH INVERTED BREVE */
  /* U+0207 */  "e",   /* LATIN SMALL LETTER E WITH INVERTED BREVE */
  /* U+0208 */  "I",   /* LATIN CAPITAL LETTER I WITH DOUBLE GRAVE */
  /* U+0209 */  "i",   /* LATIN SMALL LETTER I WITH DOUBLE GRAVE */
  /* U+020A */  "I",   /* LATIN CAPITAL LETTER I WITH INVERTED BREVE */
  /* U+020B */  "I",   /* LATIN SMALL LETTER I WITH INVERTED BREVE */
  /* U+020C */  "O",   /* LATIN CAPITAL LETTER O WITH DOUBLE GRAVE */
  /* U+020D */  "o",   /* LATIN SMALL LETTER O WITH DOUBLE GRAVE */
  /* U+020E */  "O",   /* LATIN CAPITAL LETTER O WITH INVERTED BREVE */
  /* U+020F */  "o",   /* LATIN SMALL LETTER O WITH INVERTED BREVE */
  /* U+0210 */  "R",   /* LATIN CAPITAL LETTER R WITH DOUBLE GRAVE */
  /* U+0211 */  "r",   /* LATIN SMALL LETTER R WITH DOUBLE GRAVE */
  /* U+0212 */  "R",   /* LATIN CAPITAL LETTER R WITH INVERTED BREVE */
  /* U+0213 */  "r",   /* LATIN SMALL LETTER R WITH INVERTED BREVE */
  /* U+0214 */  "U",   /* LATIN CAPITAL LETTER U WITH DOUBLE GRAVE */
  /* U+0215 */  "u",   /* LATIN SMALL LETTER U WITH DOUBLE GRAVE */
  /* U+0216 */  "U",   /* LATIN CAPITAL LETTER U WITH INVERTED BREVE */
  /* U+0217 */  "u",   /* LATIN SMALL LETTER U WITH INVERTED BREVE */
  /* U+0218 */  "S",   /* LATIN CAPITAL LETTER S WITH COMMA BELOW */
  /* U+0219 */  "s",   /* LATIN SMALL LETTER S WITH COMMA BELOW */
  /* U+021A */  "T",   /* LATIN CAPITAL LETTER T WITH COMMA BELOW */
  /* U+021B */  "t",   /* LATIN SMALL LETTER T WITH COMMA BELOW */
  /* U+021C */  "",    /* LATIN CAPITAL LETTER YOGH */
  /* U+021D */  "",    /* LATIN SMALL LETTER YOGH */
  /* U+021E */  "H",   /* LATIN CAPITAL LETTER H WITH CARON */
  /* U+021F */  "h",   /* LATIN SMALL LETTER H WITH CARON */
  /* U+0220 */  "N",   /* LATIN CAPITAL LETTER N WITH LONG RIGHT LEG */
  /* U+0221 */  "D",   /* LATIN SMALL LETTER D WITH CURL */
  /* U+0222 */  "",    /* LATIN CAPITAL LETTER OU */
  /* U+0223 */  "",    /* LATIN SMALL LETTER OU */
  /* U+0224 */  "Z",   /* LATIN CAPITAL LETTER Z WITH HOOK */
  /* U+0225 */  "z",   /* LATIN SMALL LETTER Z WITH HOOK */
  /* U+0226 */  "A",   /* LATIN CAPITAL LETTER A WITH DOT ABOVE */
  /* U+0227 */  "a",   /* LATIN SMALL LETTER A WITH DOT ABOVE */
  /* U+0228 */  "E",   /* LATIN CAPITAL LETTER E WITH CEDILLA */
  /* U+0229 */  "e",   /* LATIN SMALL LETTER E WITH CEDILLA */
  /* U+022A */  "O",   /* LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON */
  /* U+022B */  "o",   /* LATIN SMALL LETTER O WITH DIAERESIS AND MACRON */
  /* U+022C */  "O",   /* LATIN CAPITAL LETTER O WITH TILDE AND MACRON */
  /* U+022D */  "o",   /* LATIN SMALL LETTER O WITH TILDE AND MACRON */
  /* U+022E */  "O",   /* LATIN CAPITAL LETTER O WITH DOT ABOVE */
  /* U+022F */  "o",   /* LATIN SMALL LETTER O WITH DOT ABOVE */
  /* U+0230 */  "O",   /* LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON */
  /* U+0231 */  "o",   /* LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON */
  /* U+0232 */  "Y",   /* LATIN CAPITAL LETTER Y WITH MACRON */
  /* U+0233 */  "u",   /* LATIN SMALL LETTER Y WITH MACRON */
  /* U+0234 */  "l",   /* LATIN SMALL LETTER L WITH CURL */
  /* U+0235 */  "n",   /* LATIN SMALL LETTER N WITH CURL */
  /* U+0236 */  "t",   /* LATIN SMALL LETTER T WITH CURL */
  /* U+0237 */  "j",   /* LATIN SMALL LETTER DOTLESS J */
  /* U+0238 */  "db",  /* LATIN SMALL LETTER DB DIGRAPH */
  /* U+0239 */  "qp",  /* LATIN SMALL LETTER QP DIGRAPH */
  /* U+023A */  "A",   /* LATIN CAPITAL LETTER A WITH STROKE */
  /* U+023B */  "C",   /* LATIN CAPITAL LETTER C WITH STROKE */
  /* U+023C */  "c",   /* LATIN SMALL LETTER C WITH STROKE */
  /* U+023D */  "L",   /* LATIN CAPITAL LETTER L WITH BAR */
  /* U+023E */  "T",   /* LATIN CAPITAL LETTER T WITH DIAGONAL STROKE */
  /* U+023F */  "s",   /* LATIN SMALL LETTER S WITH SWASH TAIL */
  /* U+0240 */  "z",   /* LATIN SMALL LETTER Z WITH SWASH TAIL */
  /* U+0241 */  "",    /* LATIN CAPITAL LETTER GLOTTAL STOP */
  /* U+0242 */  "",    /* LATIN SMALL LETTER GLOTTAL STOP */
  /* U+0243 */  "B",   /* LATIN CAPITAL LETTER B WITH STROKE */
  /* U+0244 */  "U",   /* LATIN CAPITAL LETTER U BAR */
  /* U+0245 */  "V",   /* LATIN CAPITAL LETTER TURNED V */
  /* U+0246 */  "E",   /* LATIN CAPITAL LETTER E WITH STROKE */
  /* U+0247 */  "e",   /* LATIN SMALL LETTER E WITH STROKE */
  /* U+0248 */  "j",   /* LATIN CAPITAL LETTER J WITH STROKE */
  /* U+0249 */  "j",   /* LATIN SMALL LETTER J WITH STROKE */
  /* U+024A */  "Q",   /* LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL */
  /* U+024B */  "q",   /* LATIN SMALL LETTER Q WITH HOOK TAIL */
  /* U+024C */  "R",   /* LATIN CAPITAL LETTER R WITH STROKE */
  /* U+024D */  "r",   /* LATIN SMALL LETTER R WITH STROKE */
  /* U+024E */  "Y",   /* LATIN CAPITAL LETTER Y WITH STROKE */
  /* U+024F */  "y",   /* LATIN SMALL LETTER Y WITH STROKE */
};

struct translit translit[] = {
  { 0x2010, "-" },   /* HYPHEN                */
  { 0x2013, "-" },   /* EN DASH               */
  { 0x2019, "'" },   /* RIGHT SINGLE QUOTATION MARK */
  { 0x201A, "'" },   /* SINGLE LOW-9 QUOTATION MARK */
  { 0x2122, "(tm)" },/* TRADE MARK SIGN       */
  { 0x2160, "I" },   /* ROMAN NUMERAL ONE     */
  { 0x3161, "II" },  /* ROMAN NUMERAL TWO     */
  { 0x3162, "III" }, /* ROMAN NUMERAL THREE   */
  { 0x3163, "IV" },  /* ROMAN NUMERAL FOUR    */
  { 0x3164, "V" },   /* ROMAN NUMERAL FIVE    */
  { 0x3165, "VI" },  /* ROMAN NUMERAL SIX     */
  { 0x3166, "VII" }, /* ROMAN NUMERAL SEVEN   */
  { 0x3167, "VIII" },/* ROMAN NUMERAL EIGHT   */
  { 0x3168, "IX" },  /* ROMAN NUMERAL NINE    */
  { 0x3169, "X" },   /* ROMAN NUMERAL TEN     */
  { 0x316A, "XI" },  /* ROMAN NUMERAL ELEVEN  */
  { 0x316B, "XII" }, /* ROMAN NUMERAL TWELVE  */
};

struct translit *translit_dyn = NULL;
size_t translit_dyn_len = 0;

int debug = 0;
int drymode = 1;

#ifndef HAVE_STRLCPY
size_t
strlcpy(char *dst, const char *src, size_t size)
{
	size_t len;

	(void)strncpy(dst, src, size);
	dst[size - 1] = '\0';

	return strlen(dst);
}
#endif /* !HAVE_STRLCPY */

void
debug_ftsent(FTSENT *ftsent)
{
	char ftype;

	switch (ftsent->fts_info) {
	case FTS_F: ftype = 'f'; break;
	case FTS_SL: ftype = 'l'; break;
	case FTS_SLNONE: ftype = 'l'; break;
	case FTS_D: ftype = 'd'; break;
	case FTS_DP: ftype = 'D'; break;
	default: ftype = '?'; break;
	}

	printf("debug> %c[%d] path=\"%s\" accpath=\"%s\", name=\"%s\"\n",
	       ftype, ftsent->fts_info, ftsent->fts_path, 
	       ftsent->fts_accpath, ftsent->fts_name);

	return;
}

void
hexdump(char *str, size_t len)
{
	int i,j;

	for (i = 0; i < len; i+= 16) {
		fprintf(stderr, "%04x  ", i);
		
		for (j = 0; j < 16 && i + j < len; j++)
			fprintf(stderr, "%02x ", (unsigned char)str[i+j]);

		while (j++ < 16)
			fprintf(stderr, "   ");

		fprintf(stderr, "  ");

		for (j = 0; j < 16 && i + j < len; j++)
			fprintf(stderr, "%c",
				isprint((int)str[i+j]) ? str[i+j] : '.');

		while (j++ < 16)
			fprintf(stderr, " ");

		fprintf(stderr, "\n");
	}

	return;
}

int
hex2num(unsigned int *num, char *hex, size_t digits)
{
	int i;
	int error = 1;

	*num = 0;
	for (i = 0; i < digits; i++) {
		unsigned char d = (unsigned char)hex[digits - 1 - i];

		if (!isxdigit((int)d))
			goto out;

		if (isdigit((int)d))
			d = d - 0x30;
		else if (isupper((int)d))
			d = d - 0x41 + 10;
		else
			d = d - 0x61 + 10;

		*num |= (d << (4 * i));
	}

	error = 0;
out:
	return error;
}

int
unescape(char *outstr, char *instr, size_t in_len)
{
	int error = 1;
	size_t out_len = 0;
	int i;

	for (i = 0; i < in_len; i++) {
		if (instr[i] == 0x5c) {
			unsigned int num = 0;

			if (i + 3 >= in_len)
				goto out;

			if (instr[i + 1] != 'x')
				goto out;

			if (hex2num(&num, instr + i + 2, 2) != 0)
				goto out;

			outstr[out_len] = num;
			i += 3;
		} else {
			outstr[out_len] = instr[i];
		}

		out_len++;
	}

	outstr[out_len] = '\0';

	error = 0;
out:
	return error;
}

int
load_translit(char *file)
{
	FILE *f;
	struct stat st;
	size_t lines_max;
	size_t lines_count = 0;
	char line[LINE_MAX + 1];

	if (stat(file, &st) != 0)
		err(1, "Cannot access file \"%s\"", file);

	/* Minimum line length: strlen("U+0063\tc\n") = 7 */
	lines_max = st.st_size / 7;
	if ((translit_dyn = calloc(sizeof(*translit_dyn), lines_max)) == NULL)
		err(1, "calloc failed (%d lines in \"%s\")", lines_max, file);

	if ((f = fopen(file, "r")) == NULL)
		err(1, "Cannot open file \"%s\"", file);

	while (fgets(line, LINE_MAX, f) != NULL) {
		char *cr;
		size_t line_len;
		unsigned int codepoint = 0;
		struct translit *t = &translit_dyn[lines_count];

		lines_count++;

		if ((cr = strchr(line, (int)'\n')) == NULL)
			errx(1, "parse error at line %d in \"%s\": "
			     "runaway line", lines_count, file);

	 	line_len = cr - line + 1;
 
		if (line[0] != 'U' || line[1] != '+')
			errx(1, "syntax error at line %d in \"%s\": "
			     "\"U+\" expected here at line start",
			     lines_count, file);

		if (hex2num(&codepoint, line + 2, 4) != 0)
			errx(1, "syntax error at line %d in \"%s\": "
			     "bad hexadecimal value for codepoint",
			     lines_count, file);

		if (line[6] != '\t')
			errx(1, "syntax error at line %d in \"%s\": "
			     "tabulation expected as separator",
			     lines_count, file);

		t->codepoint = (uint16_t)codepoint;	
		t->replace = malloc(line_len - 7);
		if (t->replace == NULL)
			errx(1, "malloc failed for line length %d "
			     "at line %d in \"%s\"",
			     line_len, lines_count, file);

		if (unescape(t->replace, line + 7, 
			     line_len - 8) != 0)
			errx(1, "syntax error at line %d in \"%s\": "
			     "bad replacement string (escape error?)",
			     lines_count, file);
	}

	fclose(f);

	return lines_count;
}

void
dump_translit_codepoint(FILE *f, uint16_t codepoint, char *replace)
{
	int i;

	fprintf(f, "U+%04X\t", codepoint);
	for (i = 0; i < strlen(replace); i++) {
		if (replace[i] <= 0x20 ||
		    replace[i] == 0x5c ||
		    replace[i] >= 0x7f)
			fprintf(f, "\\x%02x", replace[i]);
		else
			fprintf(f, "%c", replace[i]);
	}
	fprintf(f, "\n");

	return;
}

void
dump_translit(FILE *f)
{
	int i;

	if (translit_dyn != NULL) {
		for (i = 0; i < translit_dyn_len; i++) 
			dump_translit_codepoint(f, translit_dyn[i].codepoint,
						translit_dyn[i].replace);
		return;
	}

	for (i = 0; i < sizeof(translit_low) / sizeof(*translit_low); i++) {
		if (translit_low[i][0] != '\0')
			dump_translit_codepoint(f, i, translit_low[i]);
	}

	for (i = 0; i < sizeof(translit) / sizeof(*translit); i++) {
		dump_translit_codepoint(f, translit[i].codepoint,
					translit[i].replace);
	}
	
	return;
}

size_t
append(char *dst, size_t *dstlen, char *append)
{
	size_t appendlen = 0;

	appendlen = strlen(append);
	if (appendlen == 0)
		goto out;
	
	if (*dstlen + appendlen > PATH_MAX) {
		fprintf(stderr, "path too long, skip characters \"%s\"\n", dst);
		appendlen = -1;
		goto out;
	}

	(void)memcpy(dst + *dstlen, append, appendlen);
	*dstlen += appendlen;
out:
	return appendlen;
}

size_t
translit_codepoint(char *outstr, size_t *out_len,
		   uint16_t codepoint, struct translit *t, size_t t_len)
{
	int i;
	size_t appendlen = 0;

	for (i = 0; i < t_len; i++) {
		if (codepoint == t[i].codepoint) {
			appendlen = append(outstr, out_len, t[i].replace);
			break;
		}
	}

	if (i == t_len)
		appendlen = -1;

	return appendlen;
}


int
alt_path(char *path)
{
	int error = -1;
	int written;
	char alt[PATH_MAX + 1];
	int i;

	for (i = 1; i < 99; i++) {
		written = snprintf(alt, PATH_MAX, "%s.%d", path, i);
		if (written == -1 || written > PATH_MAX)
			goto out;

		if (access(alt, F_OK) != 0) {
			strncpy(path, alt, PATH_MAX);
			error = 0;
			break;
		}
	}

out:
	return error;
}

int
asciify(char *src_rootpath, iconv_t cd)
{
	int error = 1;
	char tmp[PATH_MAX + 1] = "";
	char cwd[PATH_MAX + 1] = "";
	char src[PATH_MAX + 1] = "";
	size_t srclen;
	const char *constsrc;
	size_t ucs2len;
	char dst[UCS2_PATH_MAX + 1] = "";
	size_t dstlen = UCS2_PATH_MAX;
	char asciistr[PATH_MAX + 1] = "";
	size_t asciilen = 0;
	char *next;      
	size_t remain;
	size_t errconv;
	uint16_t *ucs2str;
	int i;
	char dst_rootpath[PATH_MAX + 1] = "";

	(void)strlcpy(tmp, src_rootpath, PATH_MAX);
	(void)strlcpy(cwd, dirname(tmp), PATH_MAX);

	(void)strlcpy(tmp, src_rootpath, PATH_MAX);
	(void)strlcpy(src, basename(tmp), PATH_MAX);

	constsrc = src;
	srclen = strlen(src);
	next = dst;
	remain = dstlen;

	switch (errconv = iconv(cd,
				(ICONV_CONST char **restrict)&constsrc, &srclen,
				(char **restrict)&next, &remain)) {
	case 0:
		break;
	case -1:
		fprintf(stderr, "iconv failed: %s\n", strerror(errno));
		fprintf(stderr, "original string: \"%s\"\n", src);
		hexdump(src, strlen(src));
		goto out;
	default:
		fprintf(stderr, "invalid iconv conversion for \"%s\" at 0x%x:",
			src, (unsigned int)(dstlen - remain));
		fprintf(stderr, " original string: \"%s\"\n", src);
		hexdump(src, strlen(src));
		goto out;
	}

	ucs2str = (uint16_t *)&dst; 
	ucs2len = (dstlen - remain) / 2; /* 2 bytes per char for UCS-2 */

	for (i = 0; i < ucs2len; i++) {
		size_t appendlen;

		if (translit_dyn != NULL) {
			appendlen = translit_codepoint(asciistr, &asciilen,
				       ucs2str[i],
				       translit_dyn,
				       translit_dyn_len);
			if (appendlen == -1)
				fprintf(stderr,
					"char U+%04x lost in translation "
					"at index %d of path=\"%s\"\n",
					ucs2str[i], i, src);
			continue;
		}

		if (ucs2str[i] < sizeof(translit_low) / sizeof(*translit_low)) {
			appendlen = append(asciistr, &asciilen,
					   translit_low[ucs2str[i]]);
			if (appendlen == -1)
				break;

			continue;
		}

		appendlen = translit_codepoint(asciistr, &asciilen,
			       ucs2str[i],
			       translit,
			       sizeof(translit) / sizeof(*translit));
		if (appendlen == -1)
			fprintf(stderr,
				"char U+%04x lost in translation "
				"at index %d of path=\"%s\"\n",
				ucs2str[i], i, src);
	}

	asciistr[asciilen] = '\0';

	if (strcmp(src, asciistr) == 0) {
		error = 0;
		goto out;
	}

	if (debug) {
		if (strcmp(src, src) != 0) {
			warnx("debug> src=\"%s\"\n", src);
			warnx("debug> asciistr=\"%s\"\n", asciistr);
		}
	}

	snprintf(dst_rootpath, PATH_MAX, "%s/%s", cwd, asciistr);

	if (access(dst_rootpath, F_OK) == 0) {
		fprintf(stderr, "Cannot rename \"%s\" to \"%s\": "
			"target exists. Trying alternative.\n",
			src_rootpath, dst_rootpath);

		if (alt_path(dst_rootpath) != 0) {
			fprintf(stderr, "Cannot rename \"%s\" to \"%s\": "
				"target exists. no alternative found.\n",
				src_rootpath, dst_rootpath);
			goto out;
		}

		fprintf(stderr, 
			"rename \"%s\" to alternative \"%s\" instead.\n",
			src_rootpath, dst_rootpath);
	}

	printf("%smv \"%s\" \"%s\"\n",
	       drymode ? "# " : "", src_rootpath, dst_rootpath);

	if (!drymode) {
		if (rename(src_rootpath, dst_rootpath) != 0) {
			fprintf(stderr, "rename failed \"%s\" to \"%s\": %s\n",
				src_rootpath, dst_rootpath, strerror(errno));
			goto out;
		}
	}
		
	error = 0;
out:
	return error;
}

void
usage(char *progname)
{
	fprintf(stderr,
		"usage: %s [-H | -L | -P] [-dr] [-t translit_file] "
		"[-c charset] paths ...\n",
		progname);
	fprintf(stderr,
		"       %s [-t translit_file] -l\n",
		progname);
	exit(1);
}

int
main(int argc, char *const *argv)
{
	char *progname = argv[0];
	int Hflag = 0;
	int Lflag = 0;
	int do_dump_translit = 0;
	char *charset = "utf-8";
	char *ucs2_charset = NULL;
	uint16_t bom = 0xfeff;
	iconv_t cd;
	int error = 0;
	FTS *fts;
	FTSENT *ftsent;
	int ftsopt;
	int ch;

	while ((ch = getopt(argc, argv, "HLPc:dlrt:")) != -1) {
		switch (ch) {
		case 'H':
			Hflag = 1;
			Lflag = 0;
			break;
		case 'L':
			Hflag = 0;
			Lflag = 1;
			break;
		case 'P':
			Hflag = 0;
			Lflag = 0;
			break;
		case 'c':
			charset = optarg;
			break;
		case 'd':
			debug = 1;
			break;
		case 'l':
			do_dump_translit = 1;
			break;
		case 'r':
			drymode = 0;
			break;
		case 't':
			translit_dyn_len = load_translit(optarg);
			break;
		default:
			usage(progname);
			break;
		}
	}
 
	argc -= optind;
	argv += optind;

	if (do_dump_translit) {
		dump_translit(stdout);
		exit(0);
	}

	if (argc == 0)
		usage(progname);

	/* U+FEFF is fe ff on big Endian, ff fe on little Endian */
	if (((unsigned char *)&bom)[0] == 0xfe)
		ucs2_charset = "ucs-2be";
	else
		ucs2_charset = "ucs-2le";

        if ((cd = iconv_open(ucs2_charset, charset)) == (iconv_t)-1)
                err(1, "iconv_open(\"ucs-2be\", \"%s\") failed", charset);

	ftsopt = FTS_NOSTAT | FTS_NOCHDIR;
	ftsopt |= Lflag ? FTS_LOGICAL : FTS_PHYSICAL;
	ftsopt |= Hflag ? FTS_COMFOLLOW : 0;

	if ((fts = fts_open(argv, ftsopt, NULL)) == NULL)
		err(1, "fts_open failed");

	while ((ftsent = fts_read(fts)) != NULL) {
		if (debug)
			debug_ftsent(ftsent);


		switch(ftsent->fts_info) {
		case FTS_D:
			break;
		case FTS_DP: /* FALLTHROUH */
		case FTS_F: /* FALLTHROUH */
		case FTS_NSOK: /* FALLTHROUH */
		case FTS_SL: /* FALLTHROUH */
		case FTS_SLNONE:
			if ((error = asciify(ftsent->fts_path, cd)) != 0)
				goto out;
			break;
		case FTS_DNR: /* FALLTHROUGH */
		default:
			fprintf(stderr, "skip ftsent->fts_info = %d\n",
				ftsent->fts_info);
			break;
		}
	}

out:
	(void)fts_close(fts);
	(void)iconv_close(cd);

	if (drymode)
		fprintf(stderr, "default dry mode: use -r to rename files\n");

	return error;
}
