Subject: Patch: Encoding Manager
From: Andrew Dunbar (hippietrail@yahoo.com)
Date: Sun Jun 03 2001 - 02:32:50 CDT
This patch is mainly a large extension to the approximate() method.
It's certainly not complete yet though...
There's also a couple of minor changes and a few comments that
should be looked at.
Andrew Dunbar.
-- http://linguaphile.sourceforge.net
Index: src/af/xap/xp/xap_EncodingManager.cpp =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.cpp,v retrieving revision 1.33 diff -u -r1.33 xap_EncodingManager.cpp --- src/af/xap/xp/xap_EncodingManager.cpp 2001/05/25 05:52:12 1.33 +++ src/af/xap/xp/xap_EncodingManager.cpp 2001/06/03 07:18:20 @@ -81,6 +81,8 @@ char XAP_EncodingManager::fallbackChar(UT_UCSChar c) const { + // TODO shouldn't we return U+FFFD "REPLACEMENT CHARACTER" + // TODO or U+25A0 "BLACK SQUARE" for Unicode? return '?'; } @@ -90,12 +92,878 @@ { if (max_length==0) return 0; - if (max_length==1) + if (max_length>=3) { switch (c) { - case 0x201d: - case 0x201c: + case 0x00A9: // COPYRIGHT SIGN + case 0x24B8: // CIRCLED LATIN CAPITAL LETTER C + strcpy(out,"(C)"); return 3; + case 0x00AE: // REGISTERED SIGN + case 0x24C7: // CIRCLED LATIN CAPITAL LETTER R + strcpy(out,"(R)"); return 3; + case 0xFB03: // LATIN SMALL LIGATURE FFI + strcpy(out,"ffi"); return 3; + case 0xFB04: // LATIN SMALL LIGATURE FFL + strcpy(out,"ffl"); return 3; + } + } + if (max_length>=2) + { + switch (c) + { + case 0x00C6: // LATIN CAPITAL LETTER AE + case 0x01E2: // LATIN CAPITAL LETTER AE WITH MACRON + case 0x01FC: // LATIN CAPITAL LETTER AE WITH ACUTE + strcpy(out,"AE"); return 2; + case 0x00DF: // LATIN SMALL LETTER SHARP S + strcpy(out,"ss"); return 2; + case 0x00E6: // LATIN SMALL LETTER AE + case 0x01E3: // LATIN SMALL LETTER AE WITH MACRON + case 0x01FD: // LATIN SMALL LETTER AE WITH ACUTE + strcpy(out,"ae"); return 2; + case 0x0132: // LATIN CAPITAL LIGATURE IJ + strcpy(out,"IJ"); return 2; + case 0x0133: // LATIN SMALL LIGATURE IJ + strcpy(out,"ij"); return 2; + case 0x0152: // LATIN CAPITAL LIGATURE OE + strcpy(out,"OE"); return 2; + case 0x0153: // LATIN SMALL LIGATURE OE + strcpy(out,"oe"); return 2; + case 0xFB00: // LATIN SMALL LIGATURE FF + strcpy(out,"ff"); return 2; + case 0xFB01: // LATIN SMALL LIGATURE FI + strcpy(out,"fi"); return 2; + case 0xFB02: // LATIN SMALL LIGATURE FL + strcpy(out,"fl"); return 2; + case 0xFB05: // LATIN SMALL LIGATURE LONG S T + case 0xFB06: // LATIN SMALL LIGATURE ST + strcpy(out,"st"); return 2; + } + } + if (max_length>=1) + { + switch (c) + { + case 0x00C0: // LATIN CAPITAL LETTER A WITH GRAVE + case 0x00C1: // LATIN CAPITAL LETTER A WITH ACUTE + case 0x00C2: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case 0x00C3: // LATIN CAPITAL LETTER A WITH TILDE + case 0x00C4: // LATIN CAPITAL LETTER A WITH DIAERESIS + case 0x00C5: // LATIN CAPITAL LETTER A WITH RING ABOVE + case 0x0100: // LATIN CAPITAL LETTER A WITH MACRON + case 0x0102: // LATIN CAPITAL LETTER A WITH BREVE + case 0x0104: // LATIN CAPITAL LETTER A WITH OGONEK + case 0x01CD: // LATIN CAPITAL LETTER A WITH CARON + case 0x01DE: // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + case 0x01E0: // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + case 0x01FA: // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + case 0x0200: // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + case 0x0202: // LATIN CAPITAL LETTER A WITH INVERTED BREVE + case 0x1E00: // LATIN CAPITAL LETTER A WITH RING BELOW + case 0x1EA0: // LATIN CAPITAL LETTER A WITH DOT BELOW + case 0x1EA2: // LATIN CAPITAL LETTER A WITH HOOK ABOVE + case 0x1EA4: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1EA6: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1EA8: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EAA: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1EAC: // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1EAE: // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + case 0x1EB0: // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + case 0x1EB2: // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1EB4: // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + case 0x1EB6: // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + case 0xFF21: // FULLWIDTH LATIN CAPITAL LETTER A + *out = 'A'; return 1; + case 0x0181: // LATIN CAPITAL LETTER B WITH HOOK + case 0x0182: // LATIN CAPITAL LETTER B WITH TOPBAR + case 0x1E02: // LATIN CAPITAL LETTER B WITH DOT ABOVE + case 0x1E04: // LATIN CAPITAL LETTER B WITH DOT BELOW + case 0x1E06: // LATIN CAPITAL LETTER B WITH LINE BELOW + case 0xFF22: // FULLWIDTH LATIN CAPITAL LETTER B + *out = 'B'; return 1; + case 0x00C7: // LATIN CAPITAL LETTER C WITH CEDILLA + case 0x0106: // LATIN CAPITAL LETTER C WITH ACUTE + case 0x0108: // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + case 0x010A: // LATIN CAPITAL LETTER C WITH DOT ABOVE + case 0x010C: // LATIN CAPITAL LETTER C WITH CARON + case 0x0187: // LATIN CAPITAL LETTER C WITH HOOK + case 0x1E08: // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + case 0xFF23: // FULLWIDTH LATIN CAPITAL LETTER C + *out = 'C'; return 1; + case 0x010E: // LATIN CAPITAL LETTER D WITH CARON + case 0x0110: // LATIN CAPITAL LETTER D WITH STROKE + case 0x018A: // LATIN CAPITAL LETTER D WITH HOOK + case 0x018B: // LATIN CAPITAL LETTER D WITH TOPBAR + case 0x1E0A: // LATIN CAPITAL LETTER D WITH DOT ABOVE + case 0x1E0C: // LATIN CAPITAL LETTER D WITH DOT BELOW + case 0x1E0E: // LATIN CAPITAL LETTER D WITH LINE BELOW + case 0x1E10: // LATIN CAPITAL LETTER D WITH CEDILLA + case 0x1E12: // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + case 0xFF24: // FULLWIDTH LATIN CAPITAL LETTER D + *out = 'D'; return 1; + case 0x00C8: // LATIN CAPITAL LETTER E WITH GRAVE + case 0x00C9: // LATIN CAPITAL LETTER E WITH ACUTE + case 0x00CA: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case 0x00CB: // LATIN CAPITAL LETTER E WITH DIAERESIS + case 0x0112: // LATIN CAPITAL LETTER E WITH MACRON + case 0x0114: // LATIN CAPITAL LETTER E WITH BREVE + case 0x0116: // LATIN CAPITAL LETTER E WITH DOT ABOVE + case 0x0118: // LATIN CAPITAL LETTER E WITH OGONEK + case 0x011A: // LATIN CAPITAL LETTER E WITH CARON + case 0x0204: // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + case 0x0206: // LATIN CAPITAL LETTER E WITH INVERTED BREVE + case 0x1E14: // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + case 0x1E16: // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + case 0x1E18: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + case 0x1E1A: // LATIN CAPITAL LETTER E WITH TILDE BELOW + case 0x1E1C: // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + case 0x1EB8: // LATIN CAPITAL LETTER E WITH DOT BELOW + case 0x1EBA: // LATIN CAPITAL LETTER E WITH HOOK ABOVE + case 0x1EBC: // LATIN CAPITAL LETTER E WITH TILDE + case 0x1EBE: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1EC0: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1EC2: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EC4: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1EC6: // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0xFF25: // FULLWIDTH LATIN CAPITAL LETTER E + *out = 'E'; return 1; + case 0x0191: // LATIN CAPITAL LETTER F WITH HOOK + case 0x1E1E: // LATIN CAPITAL LETTER F WITH DOT ABOVE + case 0xFF26: // FULLWIDTH LATIN CAPITAL LETTER F + *out = 'F'; return 1; + case 0x011C: // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case 0x011E: // LATIN CAPITAL LETTER G WITH BREVE + case 0x0120: // LATIN CAPITAL LETTER G WITH DOT ABOVE + case 0x0122: // LATIN CAPITAL LETTER G WITH CEDILLA + case 0x0193: // LATIN CAPITAL LETTER G WITH HOOK + case 0x01E4: // LATIN CAPITAL LETTER G WITH STROKE + case 0x01E6: // LATIN CAPITAL LETTER G WITH CARON + case 0x01F4: // LATIN CAPITAL LETTER G WITH ACUTE + case 0x1E20: // LATIN CAPITAL LETTER G WITH MACRON + case 0xFF27: // FULLWIDTH LATIN CAPITAL LETTER G + *out = 'G'; return 1; + case 0x0124: // LATIN CAPITAL LETTER H WITH CIRCUMFLEX + case 0x0126: // LATIN CAPITAL LETTER H WITH STROKE + case 0x1E22: // LATIN CAPITAL LETTER H WITH DOT ABOVE + case 0x1E24: // LATIN CAPITAL LETTER H WITH DOT BELOW + case 0x1E26: // LATIN CAPITAL LETTER H WITH DIAERESIS + case 0x1E28: // LATIN CAPITAL LETTER H WITH CEDILLA + case 0x1E2A: // LATIN CAPITAL LETTER H WITH BREVE BELOW + case 0xFF28: // FULLWIDTH LATIN CAPITAL LETTER H + *out = 'H'; return 1; + case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE + case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE + case 0x00CE: // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case 0x00CF: // LATIN CAPITAL LETTER I WITH DIAERESIS + case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE + case 0x012A: // LATIN CAPITAL LETTER I WITH MACRON + case 0x012C: // LATIN CAPITAL LETTER I WITH BREVE + case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK + case 0x0130: // LATIN CAPITAL LETTER I WITH DOT ABOVE + case 0x0197: // LATIN CAPITAL LETTER I WITH STROKE + case 0x01CF: // LATIN CAPITAL LETTER I WITH CARON + case 0x0208: // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + case 0x020A: // LATIN CAPITAL LETTER I WITH INVERTED BREVE + case 0x1E2C: // LATIN CAPITAL LETTER I WITH TILDE BELOW + case 0x1E2E: // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + case 0x1EC8: // LATIN CAPITAL LETTER I WITH HOOK ABOVE + case 0x1ECA: // LATIN CAPITAL LETTER I WITH DOT BELOW + case 0xFF29: // FULLWIDTH LATIN CAPITAL LETTER I + *out = 'I'; return 1; + case 0x0134: // LATIN CAPITAL LETTER J WITH CIRCUMFLEX + case 0xFF2A: // FULLWIDTH LATIN CAPITAL LETTER J + *out = 'J'; return 1; + case 0x0136: // LATIN CAPITAL LETTER K WITH CEDILLA + case 0x0198: // LATIN CAPITAL LETTER K WITH HOOK + case 0x01E8: // LATIN CAPITAL LETTER K WITH CARON + case 0x1E30: // LATIN CAPITAL LETTER K WITH ACUTE + case 0x1E32: // LATIN CAPITAL LETTER K WITH DOT BELOW + case 0x1E34: // LATIN CAPITAL LETTER K WITH LINE BELOW + case 0xFF2B: // FULLWIDTH LATIN CAPITAL LETTER K + *out = 'K'; return 1; + case 0x0139: // LATIN CAPITAL LETTER L WITH ACUTE + case 0x013B: // LATIN CAPITAL LETTER L WITH CEDILLA + case 0x013D: // LATIN CAPITAL LETTER L WITH CARON + case 0x013F: // LATIN CAPITAL LETTER L WITH MIDDLE DOT + case 0x0141: // LATIN CAPITAL LETTER L WITH STROKE + case 0x1E36: // LATIN CAPITAL LETTER L WITH DOT BELOW + case 0x1E38: // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + case 0x1E3A: // LATIN CAPITAL LETTER L WITH LINE BELOW + case 0x1E3C: // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + case 0xFF2C: // FULLWIDTH LATIN CAPITAL LETTER L + *out = 'L'; return 1; + case 0x1E3E: // LATIN CAPITAL LETTER M WITH ACUTE + case 0x1E40: // LATIN CAPITAL LETTER M WITH DOT ABOVE + case 0x1E42: // LATIN CAPITAL LETTER M WITH DOT BELOW + case 0xFF2D: // FULLWIDTH LATIN CAPITAL LETTER M + *out = 'M'; return 1; + case 0x00D1: // LATIN CAPITAL LETTER N WITH TILDE + case 0x0143: // LATIN CAPITAL LETTER N WITH ACUTE + case 0x0145: // LATIN CAPITAL LETTER N WITH CEDILLA + case 0x0147: // LATIN CAPITAL LETTER N WITH CARON + case 0x019D: // LATIN CAPITAL LETTER N WITH LEFT HOOK + case 0x1E44: // LATIN CAPITAL LETTER N WITH DOT ABOVE + case 0x1E46: // LATIN CAPITAL LETTER N WITH DOT BELOW + case 0x1E48: // LATIN CAPITAL LETTER N WITH LINE BELOW + case 0x1E4A: // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + case 0xFF2E: // FULLWIDTH LATIN CAPITAL LETTER N + *out = 'N'; return 1; + case 0x00D2: // LATIN CAPITAL LETTER O WITH GRAVE + case 0x00D3: // LATIN CAPITAL LETTER O WITH ACUTE + case 0x00D4: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case 0x00D5: // LATIN CAPITAL LETTER O WITH TILDE + case 0x00D6: // LATIN CAPITAL LETTER O WITH DIAERESIS + case 0x00D8: // LATIN CAPITAL LETTER O WITH STROKE + case 0x014C: // LATIN CAPITAL LETTER O WITH MACRON + case 0x014E: // LATIN CAPITAL LETTER O WITH BREVE + case 0x0150: // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + case 0x019F: // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + case 0x01A0: // LATIN CAPITAL LETTER O WITH HORN + case 0x01D1: // LATIN CAPITAL LETTER O WITH CARON + case 0x01EA: // LATIN CAPITAL LETTER O WITH OGONEK + case 0x01EC: // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + case 0x01FE: // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + case 0x020C: // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + case 0x020E: // LATIN CAPITAL LETTER O WITH INVERTED BREVE + case 0x1E4C: // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + case 0x1E4E: // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + case 0x1E50: // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + case 0x1E52: // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + case 0x1ECC: // LATIN CAPITAL LETTER O WITH DOT BELOW + case 0x1ECE: // LATIN CAPITAL LETTER O WITH HOOK ABOVE + case 0x1ED0: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ED2: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ED4: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ED6: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ED8: // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1EDA: // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + case 0x1EDC: // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + case 0x1EDE: // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + case 0x1EE0: // LATIN CAPITAL LETTER O WITH HORN AND TILDE + case 0x1EE2: // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + case 0xFF2F: // FULLWIDTH LATIN CAPITAL LETTER O + *out = 'O'; return 1; + case 0x01A4: // LATIN CAPITAL LETTER P WITH HOOK + case 0x1E54: // LATIN CAPITAL LETTER P WITH ACUTE + case 0x1E56: // LATIN CAPITAL LETTER P WITH DOT ABOVE + case 0xFF30: // FULLWIDTH LATIN CAPITAL LETTER P + *out = 'P'; return 1; + case 0xFF31: // FULLWIDTH LATIN CAPITAL LETTER Q + *out = 'Q'; return 1; + case 0x0154: // LATIN CAPITAL LETTER R WITH ACUTE + case 0x0156: // LATIN CAPITAL LETTER R WITH CEDILLA + case 0x0158: // LATIN CAPITAL LETTER R WITH CARON + case 0x0210: // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + case 0x0212: // LATIN CAPITAL LETTER R WITH INVERTED BREVE + case 0x1E58: // LATIN CAPITAL LETTER R WITH DOT ABOVE + case 0x1E5A: // LATIN CAPITAL LETTER R WITH DOT BELOW + case 0x1E5C: // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + case 0x1E5E: // LATIN CAPITAL LETTER R WITH LINE BELOW + case 0xFF32: // FULLWIDTH LATIN CAPITAL LETTER R + *out = 'R'; return 1; + case 0x015A: // LATIN CAPITAL LETTER S WITH ACUTE + case 0x015C: // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + case 0x015E: // LATIN CAPITAL LETTER S WITH CEDILLA + case 0x0160: // LATIN CAPITAL LETTER S WITH CARON + case 0x1E60: // LATIN CAPITAL LETTER S WITH DOT ABOVE + case 0x1E62: // LATIN CAPITAL LETTER S WITH DOT BELOW + case 0x1E64: // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1E66: // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + case 0x1E68: // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0xFF33: // FULLWIDTH LATIN CAPITAL LETTER S + *out = 'S'; return 1; + case 0x0162: // LATIN CAPITAL LETTER T WITH CEDILLA + case 0x0164: // LATIN CAPITAL LETTER T WITH CARON + case 0x0166: // LATIN CAPITAL LETTER T WITH STROKE + case 0x01AC: // LATIN CAPITAL LETTER T WITH HOOK + case 0x01AE: // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + case 0x1E6A: // LATIN CAPITAL LETTER T WITH DOT ABOVE + case 0x1E6C: // LATIN CAPITAL LETTER T WITH DOT BELOW + case 0x1E6E: // LATIN CAPITAL LETTER T WITH LINE BELOW + case 0x1E70: // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + case 0xFF34: // FULLWIDTH LATIN CAPITAL LETTER T + *out = 'T'; return 1; + case 0x00D9: // LATIN CAPITAL LETTER U WITH GRAVE + case 0x00DA: // LATIN CAPITAL LETTER U WITH ACUTE + case 0x00DB: // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case 0x00DC: // LATIN CAPITAL LETTER U WITH DIAERESIS + case 0x0168: // LATIN CAPITAL LETTER U WITH TILDE + case 0x016A: // LATIN CAPITAL LETTER U WITH MACRON + case 0x016C: // LATIN CAPITAL LETTER U WITH BREVE + case 0x016E: // LATIN CAPITAL LETTER U WITH RING ABOVE + case 0x0170: // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x0172: // LATIN CAPITAL LETTER U WITH OGONEK + case 0x01AF: // LATIN CAPITAL LETTER U WITH HORN + case 0x01D3: // LATIN CAPITAL LETTER U WITH CARON + case 0x01D5: // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + case 0x01D7: // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + case 0x01D9: // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + case 0x01DB: // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + case 0x0214: // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + case 0x0216: // LATIN CAPITAL LETTER U WITH INVERTED BREVE + case 0x1E72: // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + case 0x1E74: // LATIN CAPITAL LETTER U WITH TILDE BELOW + case 0x1E76: // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + case 0x1E78: // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + case 0x1E7A: // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + case 0x1EE4: // LATIN CAPITAL LETTER U WITH DOT BELOW + case 0x1EE6: // LATIN CAPITAL LETTER U WITH HOOK ABOVE + case 0x1EE8: // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + case 0x1EEA: // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + case 0x1EEC: // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + case 0x1EEE: // LATIN CAPITAL LETTER U WITH HORN AND TILDE + case 0x1EF0: // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + case 0xFF35: // FULLWIDTH LATIN CAPITAL LETTER U + *out = 'U'; return 1; + case 0x01B2: // LATIN CAPITAL LETTER V WITH HOOK + case 0x1E7C: // LATIN CAPITAL LETTER V WITH TILDE + case 0x1E7E: // LATIN CAPITAL LETTER V WITH DOT BELOW + case 0xFF36: // FULLWIDTH LATIN CAPITAL LETTER V + *out = 'V'; return 1; + case 0x0174: // LATIN CAPITAL LETTER W WITH CIRCUMFLEX + case 0x1E80: // LATIN CAPITAL LETTER W WITH GRAVE + case 0x1E82: // LATIN CAPITAL LETTER W WITH ACUTE + case 0x1E84: // LATIN CAPITAL LETTER W WITH DIAERESIS + case 0x1E86: // LATIN CAPITAL LETTER W WITH DOT ABOVE + case 0x1E88: // LATIN CAPITAL LETTER W WITH DOT BELOW + case 0xFF37: // FULLWIDTH LATIN CAPITAL LETTER W + *out = 'W'; return 1; + case 0x1E8A: // LATIN CAPITAL LETTER X WITH DOT ABOVE + case 0x1E8C: // LATIN CAPITAL LETTER X WITH DIAERESIS + case 0xFF38: // FULLWIDTH LATIN CAPITAL LETTER X + *out = 'X'; return 1; + case 0x00DD: // LATIN CAPITAL LETTER Y WITH ACUTE + case 0x0176: // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + case 0x0178: // LATIN CAPITAL LETTER Y WITH DIAERESIS + case 0x01B3: // LATIN CAPITAL LETTER Y WITH HOOK + case 0x1E8E: // LATIN CAPITAL LETTER Y WITH DOT ABOVE + case 0x1EF2: // LATIN CAPITAL LETTER Y WITH GRAVE + case 0x1EF4: // LATIN CAPITAL LETTER Y WITH DOT BELOW + case 0x1EF6: // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + case 0x1EF8: // LATIN CAPITAL LETTER Y WITH TILDE + case 0xFF39: // FULLWIDTH LATIN CAPITAL LETTER Y + *out = 'Y'; return 1; + case 0x0179: // LATIN CAPITAL LETTER Z WITH ACUTE + case 0x017B: // LATIN CAPITAL LETTER Z WITH DOT ABOVE + case 0x017D: // LATIN CAPITAL LETTER Z WITH CARON + *out = 'Z'; return 1; + case 0x00E0: // LATIN SMALL LETTER A WITH GRAVE + case 0x00E1: // LATIN SMALL LETTER A WITH ACUTE + case 0x00E2: // LATIN SMALL LETTER A WITH CIRCUMFLEX + case 0x00E3: // LATIN SMALL LETTER A WITH TILDE + case 0x00E4: // LATIN SMALL LETTER A WITH DIAERESIS + case 0x00E5: // LATIN SMALL LETTER A WITH RING ABOVE + case 0x0101: // LATIN SMALL LETTER A WITH MACRON + case 0x0103: // LATIN SMALL LETTER A WITH BREVE + case 0x0105: // LATIN SMALL LETTER A WITH OGONEK + case 0x01CE: // LATIN SMALL LETTER A WITH CARON + case 0x01DF: // LATIN SMALL LETTER A WITH DIAERESIS AND MACRON + case 0x01E1: // LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON + case 0x01FB: // LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE + case 0x0201: // LATIN SMALL LETTER A WITH DOUBLE GRAVE + case 0x0203: // LATIN SMALL LETTER A WITH INVERTED BREVE + case 0x1E01: // LATIN SMALL LETTER A WITH RING BELOW + case 0x1E9A: // LATIN SMALL LETTER A WITH RIGHT HALF RING + case 0x1EA1: // LATIN SMALL LETTER A WITH DOT BELOW + case 0x1EA3: // LATIN SMALL LETTER A WITH HOOK ABOVE + case 0x1EA5: // LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1EA7: // LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1EA9: // LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EAB: // LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1EAD: // LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1EAF: // LATIN SMALL LETTER A WITH BREVE AND ACUTE + case 0x1EB1: // LATIN SMALL LETTER A WITH BREVE AND GRAVE + case 0x1EB3: // LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1EB5: // LATIN SMALL LETTER A WITH BREVE AND TILDE + case 0x1EB7: // LATIN SMALL LETTER A WITH BREVE AND DOT BELOW + case 0xFF41: // FULLWIDTH LATIN SMALL LETTER A + *out = 'a'; return 1; + case 0x0180: // LATIN SMALL LETTER B WITH STROKE + case 0x0183: // LATIN SMALL LETTER B WITH TOPBAR + case 0x0253: // LATIN SMALL LETTER B WITH HOOK + case 0x1E03: // LATIN SMALL LETTER B WITH DOT ABOVE + case 0x1E05: // LATIN SMALL LETTER B WITH DOT BELOW + case 0x1E07: // LATIN SMALL LETTER B WITH LINE BELOW + case 0xFF42: // FULLWIDTH LATIN SMALL LETTER B + *out = 'b'; return 1; + case 0x00E7: // LATIN SMALL LETTER C WITH CEDILLA + case 0x0107: // LATIN SMALL LETTER C WITH ACUTE + case 0x0109: // LATIN SMALL LETTER C WITH CIRCUMFLEX + case 0x010B: // LATIN SMALL LETTER C WITH DOT ABOVE + case 0x010D: // LATIN SMALL LETTER C WITH CARON + case 0x0188: // LATIN SMALL LETTER C WITH HOOK + case 0x0255: // LATIN SMALL LETTER C WITH CURL + case 0x1E09: // LATIN SMALL LETTER C WITH CEDILLA AND ACUTE + case 0xFF43: // FULLWIDTH LATIN SMALL LETTER C + *out = 'c'; return 1; + case 0x010F: // LATIN SMALL LETTER D WITH CARON + case 0x0111: // LATIN SMALL LETTER D WITH STROKE + case 0x018C: // LATIN SMALL LETTER D WITH TOPBAR + case 0x0256: // LATIN SMALL LETTER D WITH TAIL + case 0x0257: // LATIN SMALL LETTER D WITH HOOK + case 0x1E0B: // LATIN SMALL LETTER D WITH DOT ABOVE + case 0x1E0D: // LATIN SMALL LETTER D WITH DOT BELOW + case 0x1E0F: // LATIN SMALL LETTER D WITH LINE BELOW + case 0x1E11: // LATIN SMALL LETTER D WITH CEDILLA + case 0x1E13: // LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW + case 0xFF44: // FULLWIDTH LATIN SMALL LETTER D + *out = 'd'; return 1; + case 0x00E8: // LATIN SMALL LETTER E WITH GRAVE + case 0x00E9: // LATIN SMALL LETTER E WITH ACUTE + case 0x00EA: // LATIN SMALL LETTER E WITH CIRCUMFLEX + case 0x00EB: // LATIN SMALL LETTER E WITH DIAERESIS + case 0x0113: // LATIN SMALL LETTER E WITH MACRON + case 0x0115: // LATIN SMALL LETTER E WITH BREVE + case 0x0117: // LATIN SMALL LETTER E WITH DOT ABOVE + case 0x0119: // LATIN SMALL LETTER E WITH OGONEK + case 0x011B: // LATIN SMALL LETTER E WITH CARON + case 0x0205: // LATIN SMALL LETTER E WITH DOUBLE GRAVE + case 0x0207: // LATIN SMALL LETTER E WITH INVERTED BREVE + case 0x1E15: // LATIN SMALL LETTER E WITH MACRON AND GRAVE + case 0x1E17: // LATIN SMALL LETTER E WITH MACRON AND ACUTE + case 0x1E19: // LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW + case 0x1E1B: // LATIN SMALL LETTER E WITH TILDE BELOW + case 0x1E1D: // LATIN SMALL LETTER E WITH CEDILLA AND BREVE + case 0x1EB9: // LATIN SMALL LETTER E WITH DOT BELOW + case 0x1EBB: // LATIN SMALL LETTER E WITH HOOK ABOVE + case 0x1EBD: // LATIN SMALL LETTER E WITH TILDE + case 0x1EBF: // LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1EC1: // LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1EC3: // LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EC5: // LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1EC7: // LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0xFF45: // FULLWIDTH LATIN SMALL LETTER E + *out = 'e'; return 1; + case 0x0192: // LATIN SMALL LETTER F WITH HOOK + case 0x1E1F: // LATIN SMALL LETTER F WITH DOT ABOVE + case 0xFF46: // FULLWIDTH LATIN SMALL LETTER F + *out = 'f'; return 1; + case 0x011D: // LATIN SMALL LETTER G WITH CIRCUMFLEX + case 0x011F: // LATIN SMALL LETTER G WITH BREVE + case 0x0121: // LATIN SMALL LETTER G WITH DOT ABOVE + case 0x0123: // LATIN SMALL LETTER G WITH CEDILLA + case 0x01E5: // LATIN SMALL LETTER G WITH STROKE + case 0x01E7: // LATIN SMALL LETTER G WITH CARON + case 0x01F5: // LATIN SMALL LETTER G WITH ACUTE + case 0x0260: // LATIN SMALL LETTER G WITH HOOK + case 0x1E21: // LATIN SMALL LETTER G WITH MACRON + case 0xFF47: // FULLWIDTH LATIN SMALL LETTER G + *out = 'g'; return 1; + case 0x0125: // LATIN SMALL LETTER H WITH CIRCUMFLEX + case 0x0127: // LATIN SMALL LETTER H WITH STROKE + case 0x0266: // LATIN SMALL LETTER H WITH HOOK + case 0x1E23: // LATIN SMALL LETTER H WITH DOT ABOVE + case 0x1E25: // LATIN SMALL LETTER H WITH DOT BELOW + case 0x1E27: // LATIN SMALL LETTER H WITH DIAERESIS + case 0x1E29: // LATIN SMALL LETTER H WITH CEDILLA + case 0x1E2B: // LATIN SMALL LETTER H WITH BREVE BELOW + case 0x1E96: // LATIN SMALL LETTER H WITH LINE BELOW + case 0xFF48: // FULLWIDTH LATIN SMALL LETTER H + *out = 'h'; return 1; + case 0x00EC: // LATIN SMALL LETTER I WITH GRAVE + case 0x00ED: // LATIN SMALL LETTER I WITH ACUTE + case 0x00EE: // LATIN SMALL LETTER I WITH CIRCUMFLEX + case 0x00EF: // LATIN SMALL LETTER I WITH DIAERESIS + case 0x0129: // LATIN SMALL LETTER I WITH TILDE + case 0x012B: // LATIN SMALL LETTER I WITH MACRON + case 0x012D: // LATIN SMALL LETTER I WITH BREVE + case 0x012F: // LATIN SMALL LETTER I WITH OGONEK + case 0x01D0: // LATIN SMALL LETTER I WITH CARON + case 0x0209: // LATIN SMALL LETTER I WITH DOUBLE GRAVE + case 0x020B: // LATIN SMALL LETTER I WITH INVERTED BREVE + case 0x0268: // LATIN SMALL LETTER I WITH STROKE + case 0x1E2D: // LATIN SMALL LETTER I WITH TILDE BELOW + case 0x1E2F: // LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE + case 0x1EC9: // LATIN SMALL LETTER I WITH HOOK ABOVE + case 0x1ECB: // LATIN SMALL LETTER I WITH DOT BELOW + case 0xFF49: // FULLWIDTH LATIN SMALL LETTER I + *out = 'i'; return 1; + case 0x0135: // LATIN SMALL LETTER J WITH CIRCUMFLEX + case 0x01F0: // LATIN SMALL LETTER J WITH CARON + case 0x029D: // LATIN SMALL LETTER J WITH CROSSED-TAIL + case 0xFF4A: // FULLWIDTH LATIN SMALL LETTER J + *out = 'j'; return 1; + case 0x0137: // LATIN SMALL LETTER K WITH CEDILLA + case 0x0199: // LATIN SMALL LETTER K WITH HOOK + case 0x01E9: // LATIN SMALL LETTER K WITH CARON + case 0x1E31: // LATIN SMALL LETTER K WITH ACUTE + case 0x1E33: // LATIN SMALL LETTER K WITH DOT BELOW + case 0x1E35: // LATIN SMALL LETTER K WITH LINE BELOW + case 0xFF4B: // FULLWIDTH LATIN SMALL LETTER K + *out = 'k'; return 1; + case 0x013A: // LATIN SMALL LETTER L WITH ACUTE + case 0x013C: // LATIN SMALL LETTER L WITH CEDILLA + case 0x013E: // LATIN SMALL LETTER L WITH CARON + case 0x0140: // LATIN SMALL LETTER L WITH MIDDLE DOT + case 0x0142: // LATIN SMALL LETTER L WITH STROKE + case 0x019A: // LATIN SMALL LETTER L WITH BAR + case 0x026B: // LATIN SMALL LETTER L WITH MIDDLE TILDE + case 0x026C: // LATIN SMALL LETTER L WITH BELT + case 0x026D: // LATIN SMALL LETTER L WITH RETROFLEX HOOK + case 0x1E37: // LATIN SMALL LETTER L WITH DOT BELOW + case 0x1E39: // LATIN SMALL LETTER L WITH DOT BELOW AND MACRON + case 0x1E3B: // LATIN SMALL LETTER L WITH LINE BELOW + case 0x1E3D: // LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW + case 0xFF4C: // FULLWIDTH LATIN SMALL LETTER L + *out = 'l'; return 1; + case 0x0271: // LATIN SMALL LETTER M WITH HOOK + case 0x1E3F: // LATIN SMALL LETTER M WITH ACUTE + case 0x1E41: // LATIN SMALL LETTER M WITH DOT ABOVE + case 0x1E43: // LATIN SMALL LETTER M WITH DOT BELOW + case 0xFF4D: // FULLWIDTH LATIN SMALL LETTER M + *out = 'm'; return 1; + case 0x00F1: // LATIN SMALL LETTER N WITH TILDE + case 0x0144: // LATIN SMALL LETTER N WITH ACUTE + case 0x0146: // LATIN SMALL LETTER N WITH CEDILLA + case 0x0148: // LATIN SMALL LETTER N WITH CARON + case 0x019E: // LATIN SMALL LETTER N WITH LONG RIGHT LEG + case 0x0272: // LATIN SMALL LETTER N WITH LEFT HOOK + case 0x0273: // LATIN SMALL LETTER N WITH RETROFLEX HOOK + case 0x1E45: // LATIN SMALL LETTER N WITH DOT ABOVE + case 0x1E47: // LATIN SMALL LETTER N WITH DOT BELOW + case 0x1E49: // LATIN SMALL LETTER N WITH LINE BELOW + case 0x1E4B: // LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW + case 0xFF4E: // FULLWIDTH LATIN SMALL LETTER N + *out = 'n'; return 1; + case 0x00F2: // LATIN SMALL LETTER O WITH GRAVE + case 0x00F3: // LATIN SMALL LETTER O WITH ACUTE + case 0x00F4: // LATIN SMALL LETTER O WITH CIRCUMFLEX + case 0x00F5: // LATIN SMALL LETTER O WITH TILDE + case 0x00F6: // LATIN SMALL LETTER O WITH DIAERESIS + case 0x00F8: // LATIN SMALL LETTER O WITH STROKE + case 0x014D: // LATIN SMALL LETTER O WITH MACRON + case 0x014F: // LATIN SMALL LETTER O WITH BREVE + case 0x0151: // LATIN SMALL LETTER O WITH DOUBLE ACUTE + case 0x01A1: // LATIN SMALL LETTER O WITH HORN + case 0x01D2: // LATIN SMALL LETTER O WITH CARON + case 0x01EB: // LATIN SMALL LETTER O WITH OGONEK + case 0x01ED: // LATIN SMALL LETTER O WITH OGONEK AND MACRON + case 0x01FF: // LATIN SMALL LETTER O WITH STROKE AND ACUTE + case 0x020D: // LATIN SMALL LETTER O WITH DOUBLE GRAVE + case 0x020F: // LATIN SMALL LETTER O WITH INVERTED BREVE + case 0x1E4D: // LATIN SMALL LETTER O WITH TILDE AND ACUTE + case 0x1E4F: // LATIN SMALL LETTER O WITH TILDE AND DIAERESIS + case 0x1E51: // LATIN SMALL LETTER O WITH MACRON AND GRAVE + case 0x1E53: // LATIN SMALL LETTER O WITH MACRON AND ACUTE + case 0x1ECD: // LATIN SMALL LETTER O WITH DOT BELOW + case 0x1ECF: // LATIN SMALL LETTER O WITH HOOK ABOVE + case 0x1ED1: // LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ED3: // LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ED5: // LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ED7: // LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ED9: // LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1EDB: // LATIN SMALL LETTER O WITH HORN AND ACUTE + case 0x1EDD: // LATIN SMALL LETTER O WITH HORN AND GRAVE + case 0x1EDF: // LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE + case 0x1EE1: // LATIN SMALL LETTER O WITH HORN AND TILDE + case 0x1EE3: // LATIN SMALL LETTER O WITH HORN AND DOT BELOW + case 0xFF4F: // FULLWIDTH LATIN SMALL LETTER O + *out = 'o'; return 1; + case 0x01A5: // LATIN SMALL LETTER P WITH HOOK + case 0x1E55: // LATIN SMALL LETTER P WITH ACUTE + case 0x1E57: // LATIN SMALL LETTER P WITH DOT ABOVE + case 0xFF50: // FULLWIDTH LATIN SMALL LETTER P + *out = 'p'; return 1; + case 0x02A0: // LATIN SMALL LETTER Q WITH HOOK + case 0xFF51: // FULLWIDTH LATIN SMALL LETTER Q + *out = 'q'; return 1; + case 0x0155: // LATIN SMALL LETTER R WITH ACUTE + case 0x0157: // LATIN SMALL LETTER R WITH CEDILLA + case 0x0159: // LATIN SMALL LETTER R WITH CARON + case 0x0211: // LATIN SMALL LETTER R WITH DOUBLE GRAVE + case 0x0213: // LATIN SMALL LETTER R WITH INVERTED BREVE + case 0x027C: // LATIN SMALL LETTER R WITH LONG LEG + case 0x027D: // LATIN SMALL LETTER R WITH TAIL + case 0x027E: // LATIN SMALL LETTER R WITH FISHHOOK + case 0x1E59: // LATIN SMALL LETTER R WITH DOT ABOVE + case 0x1E5B: // LATIN SMALL LETTER R WITH DOT BELOW + case 0x1E5D: // LATIN SMALL LETTER R WITH DOT BELOW AND MACRON + case 0x1E5F: // LATIN SMALL LETTER R WITH LINE BELOW + case 0xFF52: // FULLWIDTH LATIN SMALL LETTER R + *out = 'r'; return 1; + case 0x015B: // LATIN SMALL LETTER S WITH ACUTE + case 0x015D: // LATIN SMALL LETTER S WITH CIRCUMFLEX + case 0x015F: // LATIN SMALL LETTER S WITH CEDILLA + case 0x0161: // LATIN SMALL LETTER S WITH CARON + case 0x0282: // LATIN SMALL LETTER S WITH HOOK + case 0x1E61: // LATIN SMALL LETTER S WITH DOT ABOVE + case 0x1E63: // LATIN SMALL LETTER S WITH DOT BELOW + case 0x1E65: // LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1E67: // LATIN SMALL LETTER S WITH CARON AND DOT ABOVE + case 0x1E69: // LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0xFF53: // FULLWIDTH LATIN SMALL LETTER S + *out = 's'; return 1; + case 0x0163: // LATIN SMALL LETTER T WITH CEDILLA + case 0x0165: // LATIN SMALL LETTER T WITH CARON + case 0x0167: // LATIN SMALL LETTER T WITH STROKE + case 0x01AB: // LATIN SMALL LETTER T WITH PALATAL HOOK + case 0x01AD: // LATIN SMALL LETTER T WITH HOOK + case 0x0288: // LATIN SMALL LETTER T WITH RETROFLEX HOOK + case 0x1E6B: // LATIN SMALL LETTER T WITH DOT ABOVE + case 0x1E6D: // LATIN SMALL LETTER T WITH DOT BELOW + case 0x1E6F: // LATIN SMALL LETTER T WITH LINE BELOW + case 0x1E71: // LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW + case 0x1E97: // LATIN SMALL LETTER T WITH DIAERESIS + case 0xFF54: // FULLWIDTH LATIN SMALL LETTER T + *out = 't'; return 1; + case 0x00F9: // LATIN SMALL LETTER U WITH GRAVE + case 0x00FA: // LATIN SMALL LETTER U WITH ACUTE + case 0x00FB: // LATIN SMALL LETTER U WITH CIRCUMFLEX + case 0x00FC: // LATIN SMALL LETTER U WITH DIAERESIS + case 0x0169: // LATIN SMALL LETTER U WITH TILDE + case 0x016B: // LATIN SMALL LETTER U WITH MACRON + case 0x016D: // LATIN SMALL LETTER U WITH BREVE + case 0x016F: // LATIN SMALL LETTER U WITH RING ABOVE + case 0x0171: // LATIN SMALL LETTER U WITH DOUBLE ACUTE + case 0x0173: // LATIN SMALL LETTER U WITH OGONEK + case 0x01B0: // LATIN SMALL LETTER U WITH HORN + case 0x01D4: // LATIN SMALL LETTER U WITH CARON + case 0x01D6: // LATIN SMALL LETTER U WITH DIAERESIS AND MACRON + case 0x01D8: // LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE + case 0x01DA: // LATIN SMALL LETTER U WITH DIAERESIS AND CARON + case 0x01DC: // LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE + case 0x0215: // LATIN SMALL LETTER U WITH DOUBLE GRAVE + case 0x0217: // LATIN SMALL LETTER U WITH INVERTED BREVE + case 0x0289: // LATIN SMALL LETTER U BAR + case 0x1E73: // LATIN SMALL LETTER U WITH DIAERESIS BELOW + case 0x1E75: // LATIN SMALL LETTER U WITH TILDE BELOW + case 0x1E77: // LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW + case 0x1E79: // LATIN SMALL LETTER U WITH TILDE AND ACUTE + case 0x1E7B: // LATIN SMALL LETTER U WITH MACRON AND DIAERESIS + case 0x1EE5: // LATIN SMALL LETTER U WITH DOT BELOW + case 0x1EE7: // LATIN SMALL LETTER U WITH HOOK ABOVE + case 0x1EE9: // LATIN SMALL LETTER U WITH HORN AND ACUTE + case 0x1EEB: // LATIN SMALL LETTER U WITH HORN AND GRAVE + case 0x1EED: // LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE + case 0x1EEF: // LATIN SMALL LETTER U WITH HORN AND TILDE + case 0x1EF1: // LATIN SMALL LETTER U WITH HORN AND DOT BELOW + case 0xFF55: // FULLWIDTH LATIN SMALL LETTER U + *out = 'u'; return 1; + case 0x028B: // LATIN SMALL LETTER V WITH HOOK + case 0x1E7D: // LATIN SMALL LETTER V WITH TILDE + case 0x1E7F: // LATIN SMALL LETTER V WITH DOT BELOW + case 0xFF56: // FULLWIDTH LATIN SMALL LETTER V + *out = 'v'; return 1; + case 0x0175: // LATIN SMALL LETTER W WITH CIRCUMFLEX + case 0x1E81: // LATIN SMALL LETTER W WITH GRAVE + case 0x1E83: // LATIN SMALL LETTER W WITH ACUTE + case 0x1E85: // LATIN SMALL LETTER W WITH DIAERESIS + case 0x1E87: // LATIN SMALL LETTER W WITH DOT ABOVE + case 0x1E89: // LATIN SMALL LETTER W WITH DOT BELOW + case 0x1E98: // LATIN SMALL LETTER W WITH RING ABOVE + case 0xFF57: // FULLWIDTH LATIN SMALL LETTER W + *out = 'w'; return 1; + case 0x1E8B: // LATIN SMALL LETTER X WITH DOT ABOVE + case 0x1E8D: // LATIN SMALL LETTER X WITH DIAERESIS + case 0xFF58: // FULLWIDTH LATIN SMALL LETTER X + *out = 'x'; return 1; + case 0x00FD: // LATIN SMALL LETTER Y WITH ACUTE + case 0x00FF: // LATIN SMALL LETTER Y WITH DIAERESIS + case 0x0177: // LATIN SMALL LETTER Y WITH CIRCUMFLEX + case 0x01B4: // LATIN SMALL LETTER Y WITH HOOK + case 0x1E8F: // LATIN SMALL LETTER Y WITH DOT ABOVE + case 0x1E99: // LATIN SMALL LETTER Y WITH RING ABOVE + case 0x1EF3: // LATIN SMALL LETTER Y WITH GRAVE + case 0x1EF5: // LATIN SMALL LETTER Y WITH DOT BELOW + case 0x1EF7: // LATIN SMALL LETTER Y WITH HOOK ABOVE + case 0x1EF9: // LATIN SMALL LETTER Y WITH TILDE + case 0xFF59: // FULLWIDTH LATIN SMALL LETTER Y + *out = 'y'; return 1; + case 0x017A: // LATIN SMALL LETTER Z WITH ACUTE + case 0x017C: // LATIN SMALL LETTER Z WITH DOT ABOVE + case 0x017E: // LATIN SMALL LETTER Z WITH CARON + case 0x01B6: // LATIN SMALL LETTER Z WITH STROKE + case 0x0290: // LATIN SMALL LETTER Z WITH RETROFLEX HOOK + case 0x0291: // LATIN SMALL LETTER Z WITH CURL + case 0x1E91: // LATIN SMALL LETTER Z WITH CIRCUMFLEX + case 0x1E93: // LATIN SMALL LETTER Z WITH DOT BELOW + case 0x1E95: // LATIN SMALL LETTER Z WITH LINE BELOW + case 0xFF5A: // FULLWIDTH LATIN SMALL LETTER Z + *out = 'z'; return 1; + case 0x0660: // ARABIC-INDIC DIGIT ZERO + case 0x06F0: // EXTENDED ARABIC-INDIC DIGIT ZERO + case 0x0966: // DEVANAGARI DIGIT ZERO + case 0x09E6: // BENGALI DIGIT ZERO + case 0x0A66: // GURMUKHI DIGIT ZERO + case 0x0AE6: // GUJARATI DIGIT ZERO + case 0x0B66: // ORIYA DIGIT ZERO + case 0x0BE6: // TAMIL DIGIT ONE + case 0x0C66: // TELUGU DIGIT ZERO + case 0x0CE6: // KANNADA DIGIT ZERO + case 0x0D66: // MALAYALAM DIGIT ZERO + case 0x0E50: // THAI DIGIT ZERO + case 0x0ED0: // LAO DIGIT ZERO + case 0x0F20: // TIBETAN DIGIT ZERO + case 0xFF10: // FULLWIDTH DIGIT ZERO + *out = '0'; return 1; + case 0x0661: // ARABIC-INDIC DIGIT ONE + case 0x06F1: // EXTENDED ARABIC-INDIC DIGIT ONE + case 0x0967: // DEVANAGARI DIGIT ONE + case 0x09E7: // BENGALI DIGIT ONE + case 0x0A67: // GURMUKHI DIGIT ONE + case 0x0AE7: // GUJARATI DIGIT ONE + case 0x0B67: // ORIYA DIGIT ONE + case 0x0BE7: // TAMIL DIGIT ONE + case 0x0C67: // TELUGU DIGIT ONE + case 0x0CE7: // KANNADA DIGIT ONE + case 0x0D67: // MALAYALAM DIGIT ONE + case 0x0E51: // THAI DIGIT ONE + case 0x0ED1: // LAO DIGIT ONE + case 0x0F21: // TIBETAN DIGIT ONE + case 0xFF11: // FULLWIDTH DIGIT ONE + *out = '1'; return 1; + case 0x0662: // ARABIC-INDIC DIGIT TWO + case 0x06F2: // EXTENDED ARABIC-INDIC DIGIT TWO + case 0x0968: // DEVANAGARI DIGIT TWO + case 0x09E8: // BENGALI DIGIT TWO + case 0x0A68: // GURMUKHI DIGIT TWO + case 0x0AE8: // GUJARATI DIGIT TWO + case 0x0B68: // ORIYA DIGIT TWO + case 0x0BE8: // TAMIL DIGIT TWO + case 0x0C68: // TELUGU DIGIT TWO + case 0x0CE8: // KANNADA DIGIT TWO + case 0x0D68: // MALAYALAM DIGIT TWO + case 0x0E52: // THAI DIGIT TWO + case 0x0ED2: // LAO DIGIT TWO + case 0x0F22: // TIBETAN DIGIT TWO + case 0xFF12: // FULLWIDTH DIGIT TWO + *out = '2'; return 1; + case 0x0663: // ARABIC-INDIC DIGIT THREE + case 0x06F3: // EXTENDED ARABIC-INDIC DIGIT THREE + case 0x0969: // DEVANAGARI DIGIT THREE + case 0x09E9: // BENGALI DIGIT THREE + case 0x0A69: // GURMUKHI DIGIT THREE + case 0x0AE9: // GUJARATI DIGIT THREE + case 0x0B69: // ORIYA DIGIT THREE + case 0x0BE9: // TAMIL DIGIT THREE + case 0x0C69: // TELUGU DIGIT THREE + case 0x0CE9: // KANNADA DIGIT THREE + case 0x0D69: // MALAYALAM DIGIT THREE + case 0x0E53: // THAI DIGIT THREE + case 0x0ED3: // LAO DIGIT THREE + case 0x0F23: // TIBETAN DIGIT THREE + case 0xFF13: // FULLWIDTH DIGIT THREE + *out = '3'; return 1; + case 0x0664: // ARABIC-INDIC DIGIT FOUR + case 0x06F4: // EXTENDED ARABIC-INDIC DIGIT FOUR + case 0x096A: // DEVANAGARI DIGIT FOUR + case 0x09EA: // BENGALI DIGIT FOUR + case 0x0A6A: // GURMUKHI DIGIT FOUR + case 0x0AEA: // GUJARATI DIGIT FOUR + case 0x0B6A: // ORIYA DIGIT FOUR + case 0x0BEA: // TAMIL DIGIT FOUR + case 0x0C6A: // TELUGU DIGIT FOUR + case 0x0CEA: // KANNADA DIGIT FOUR + case 0x0D6A: // MALAYALAM DIGIT FOUR + case 0x0E54: // THAI DIGIT FOUR + case 0x0ED4: // LAO DIGIT FOUR + case 0x0F24: // TIBETAN DIGIT FOUR + case 0xFF14: // FULLWIDTH DIGIT FOUR + *out = '4'; return 1; + case 0x0665: // ARABIC-INDIC DIGIT FIVE + case 0x06F5: // EXTENDED ARABIC-INDIC DIGIT FIVE + case 0x096B: // DEVANAGARI DIGIT FIVE + case 0x09EB: // BENGALI DIGIT FIVE + case 0x0A6B: // GURMUKHI DIGIT FIVE + case 0x0AEB: // GUJARATI DIGIT FIVE + case 0x0B6B: // ORIYA DIGIT FIVE + case 0x0BEB: // TAMIL DIGIT FIVE + case 0x0C6B: // TELUGU DIGIT FIVE + case 0x0CEB: // KANNADA DIGIT FIVE + case 0x0D6B: // MALAYALAM DIGIT FIVE + case 0x0E55: // THAI DIGIT FIVE + case 0x0ED5: // LAO DIGIT FIVE + case 0x0F25: // TIBETAN DIGIT FIVE + case 0xFF15: // FULLWIDTH DIGIT FIVE + *out = '5'; return 1; + case 0x0666: // ARABIC-INDIC DIGIT SIX + case 0x06F6: // EXTENDED ARABIC-INDIC DIGIT SIX + case 0x096C: // DEVANAGARI DIGIT SIX + case 0x09EC: // BENGALI DIGIT SIX + case 0x0A6C: // GURMUKHI DIGIT SIX + case 0x0AEC: // GUJARATI DIGIT SIX + case 0x0B6C: // ORIYA DIGIT SIX + case 0x0BEC: // TAMIL DIGIT SIX + case 0x0C6C: // TELUGU DIGIT SIX + case 0x0CEC: // KANNADA DIGIT SIX + case 0x0D6C: // MALAYALAM DIGIT SIX + case 0x0E56: // THAI DIGIT SIX + case 0x0ED6: // LAO DIGIT SIX + case 0x0F26: // TIBETAN DIGIT SIX + case 0xFF16: // FULLWIDTH DIGIT SIX + *out = '6'; return 1; + case 0x0667: // ARABIC-INDIC DIGIT SEVEN + case 0x06F7: // EXTENDED ARABIC-INDIC DIGIT SEVEN + case 0x096D: // DEVANAGARI DIGIT SEVEN + case 0x09ED: // BENGALI DIGIT SEVEN + case 0x0A6D: // GURMUKHI DIGIT SEVEN + case 0x0AED: // GUJARATI DIGIT SEVEN + case 0x0B6D: // ORIYA DIGIT SEVEN + case 0x0BED: // TAMIL DIGIT SEVEN + case 0x0C6D: // TELUGU DIGIT SEVEN + case 0x0CED: // KANNADA DIGIT SEVEN + case 0x0D6D: // MALAYALAM DIGIT SEVEN + case 0x0E57: // THAI DIGIT SEVEN + case 0x0ED7: // LAO DIGIT SEVEN + case 0x0F27: // TIBETAN DIGIT SEVEN + case 0xFF17: // FULLWIDTH DIGIT SEVEN + *out = '7'; return 1; + case 0x0668: // ARABIC-INDIC DIGIT EIGHT + case 0x06F8: // EXTENDED ARABIC-INDIC DIGIT EIGHT + case 0x096E: // DEVANAGARI DIGIT EIGHT + case 0x09EE: // BENGALI DIGIT EIGHT + case 0x0A6E: // GURMUKHI DIGIT EIGHT + case 0x0AEE: // GUJARATI DIGIT EIGHT + case 0x0B6E: // ORIYA DIGIT EIGHT + case 0x0BEE: // TAMIL DIGIT EIGHT + case 0x0C6E: // TELUGU DIGIT EIGHT + case 0x0CEE: // KANNADA DIGIT EIGHT + case 0x0D6E: // MALAYALAM DIGIT EIGHT + case 0x0E58: // THAI DIGIT EIGHT + case 0x0ED8: // LAO DIGIT EIGHT + case 0x0F28: // TIBETAN DIGIT EIGHT + case 0xFF18: // FULLWIDTH DIGIT EIGHT + *out = '8'; return 1; + case 0x0669: // ARABIC-INDIC DIGIT NINE + case 0x06F9: // EXTENDED ARABIC-INDIC DIGIT NINE + case 0x096F: // DEVANAGARI DIGIT NINE + case 0x09EF: // BENGALI DIGIT NINE + case 0x0A6F: // GURMUKHI DIGIT NINE + case 0x0AEF: // GUJARATI DIGIT NINE + case 0x0B6F: // ORIYA DIGIT NINE + case 0x0BEF: // TAMIL DIGIT NINE + case 0x0C6F: // TELUGU DIGIT NINE + case 0x0CEF: // KANNADA DIGIT NINE + case 0x0D6F: // MALAYALAM DIGIT NINE + case 0x0E59: // THAI DIGIT NINE + case 0x0ED9: // LAO DIGIT NINE + case 0x0F29: // TIBETAN DIGIT NINE + case 0xFF19: // FULLWIDTH DIGIT NINE + *out = '9'; return 1; + case 0x00A1: // INVERTED EXCLAMATION MARK + *out = '!'; return 1; + case 0x00A6: // BROKEN BAR + *out = '|'; return 1; + case 0x00AD: // SOFT HYPHEN + case 0x02D7: // MODIFIER LETTER MINUS SIGN + case 0x2010: // HYPHEN + case 0x2011: // NON-BREAKING HYPHEN + case 0x2212: // MINUS SIGN + case 0xFE63: // SMALL HYPHEN-MINUS + case 0xFF0D: // FULLWIDTH HYPHEN-MINUS + *out = '-'; return 1; + case 0x00BF: // INVERTED QUESTION MARK + *out = '?'; return 1; + case 0x00D7: // MULTIPLICATION SIGN + *out = 'x'; return 1; + case 0x2018: // LEFT SINGLE QUOTATION MARK + case 0x2019: // RIGHT SINGLE QUOTATION MARK + case 0xFF07: // FULLWIDTH APOSTROPHE + *out = '\''; return 1; + case 0x201c: // LEFT DOUBLE QUOTATION MARK + case 0x201d: // RIGHT DOUBLE QUOTATION MARK + case 0xFF02: // FULLWIDTH QUOTATION MARK *out = '"'; return 1; default: return 0; @@ -486,6 +1354,10 @@ }; +/* + TODO I'm pretty sure you can't break Korean at any character. + And what about Japanese Katakana and Hiragana? +*/ static const _rmap can_break_words_data[]= { {"0"}, /* default value - can't break words at any character. */ @@ -787,7 +1669,8 @@ } { if (cjk_locale()) { - /* CJK guys should do something similar to 'else' branch */ + /* CJK guys should do something similar to 'else' branch */ + TexPrologue = " "; } else { char buf[500]; int len = 0; @@ -876,6 +1759,11 @@ return TexPrologue; }; +// Warning: +// This code forces us to use "GB2312", "BIG5", etc instead +// of "CP936", "CP950", etc even when our iconv supports +// the "CPxxx" form and the encodings differ. +// Be sure this is what you want if you call this function. const char* XAP_EncodingManager::charsetFromCodepage(int lid) const { static char buf[100]; Index: src/af/xap/xp/xap_EncodingManager.h =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.h,v retrieving revision 1.19 diff -u -r1.19 xap_EncodingManager.h --- src/af/xap/xp/xap_EncodingManager.h 2001/05/25 05:52:12 1.19 +++ src/af/xap/xp/xap_EncodingManager.h 2001/06/03 07:18:23 @@ -52,9 +52,14 @@ /* this shouldn't return NULL. Don't free or write to returned string. The string should be uppercased (extra font tarballs assume this). + TODO isn't iconv case sensitive? Mac encoding names are mixed case! */ virtual const char* getNativeEncodingName() const; + /* + This should return true for any Unicode locale: + UTF-8 on *nix, UCS-2 on Windows, etc + */ inline virtual bool isUnicodeLocale() const {return m_bIsUnicodeLocale;} /*
_________________________________________________________ Do You Yahoo!? Get your free @yahoo.com address at http://mail.yahoo.com
This archive was generated by hypermail 2b25 : Sun Jun 03 2001 - 02:31:05 CDT