Patch: Fix for Bug 1164


Subject: Patch: Fix for Bug 1164
From: Andrew Dunbar (hippietrail@yahoo.com)
Date: Mon May 21 2001 - 03:21:03 CDT


This patch enables the RTF importer to make us of the \fcharset and
\fcpg properties of the font table and switch between encodings
when it encounters \f.

There are a few edge cases and charsets I couldn't find information
on so please contact me or implement these if you know about them.

Andrew Dunbar.

-- 
http://linguaphile.sourceforge.net

Index: src/wp/impexp/xp/ie_imp_RTF.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_RTF.cpp,v retrieving revision 1.61 diff -u -r1.61 ie_imp_RTF.cpp --- src/wp/impexp/xp/ie_imp_RTF.cpp 2001/05/08 04:19:38 1.61 +++ src/wp/impexp/xp/ie_imp_RTF.cpp 2001/05/21 08:16:57 @@ -149,10 +149,89 @@ m_family = fontFamily; m_charSet = charSet; m_codepage = codepage; + m_szCodepage = "MS-ANSI"; m_pitch = pitch; memcpy(m_panose, panose, 10*sizeof(unsigned char)); m_pFontName = pFontName; m_pAlternativeFontName = pAlternativeFontName; + + // Set charset/codepage converter + if (m_codepage && m_charSet) + { + UT_DEBUGMSG(("RTF Font has codepage *and* charset\n")); + UT_ASSERT(UT_NOT_IMPLEMENTED); + } + else if (m_codepage) + { + m_szCodepage = XAP_EncodingManager::get_instance()->charsetFromCodepage(m_codepage); + } + else if (m_charSet) + { + switch (m_charSet) + { + case 0: // ANSI_CHARSET + m_szCodepage = "MS-ANSI"; // CP1252 + break; + case 2: // SYMBOL_CHARSET + UT_DEBUGMSG(("RTF Font charset 'Symbol' not implemented\n")); + UT_ASSERT(UT_NOT_IMPLEMENTED); + break; + case 128: // SHIFTJIS_CHARSET + m_szCodepage = "SHIFT-JIS"; + break; + case 161: // GREEK_CHARSET + m_szCodepage = "MS-GREEK"; // CP1253 + break; + case 162: // TURKISH_CHARSET + m_szCodepage = "MS-TURK"; // CP1254 + break; + // TODO What is different? Iconv only supports one MS Hebrew codepage. + case 181: // HEBREWUSER_CHARSET + UT_DEBUGMSG(("RTF Font charset 'HEBREWUSER'??\n")); + case 177: // HEBREW_CHARSET + m_szCodepage = "MS-HEBR"; // CP1255 + break; + // TODO What is different? Iconv only supports one MS Arabic codepage. + case 178: // ARABICSIMPLIFIED_CHARSET + UT_DEBUGMSG(("RTF Font charset 'ARABICSIMPLIFIED'??\n")); + m_szCodepage = "MS-ARAB"; // CP1256 + break; + case 179: // ARABICTRADITIONAL_CHARSET + UT_DEBUGMSG(("RTF Font charset 'ARABICTRADITIONAL'??\n")); + m_szCodepage = "MS-ARAB"; // CP1256 + break; + case 180: // ARABICUSER_CHARSET + UT_DEBUGMSG(("RTF Font charset 'ARABICUSER'??\n")); + m_szCodepage = "MS-ARAB"; // CP1256 + break; + case 204: // CYRILLIC_CHARSET + m_szCodepage = "MS-CYRL"; // CP1251 + break; + case 238: // EASTERNEUROPE_CHARSET + m_szCodepage = "MS-EE"; // CP1250 + break; + case 254: // PC437_CHARSET + // TODO What is this and can iconv do it? + UT_DEBUGMSG(("RTF Font charset 'PC437'??\n")); + UT_ASSERT(UT_NOT_IMPLEMENTED); + break; + case 255: // OEM_CHARSET + // TODO Can iconv do this? + UT_DEBUGMSG(("RTF Font charset 'OEM'??\n")); + UT_ASSERT(UT_NOT_IMPLEMENTED); + break; + default: + UT_DEBUGMSG(("RTF Font charset unknown: %d\n", m_charSet)); + // TODO Unknown charset + UT_ASSERT(UT_NOT_IMPLEMENTED); + } + } + else + { + // TODO No codepage or charset - what do we do? + UT_DEBUGMSG(("RTF Font has neither codepage *nor* charset\n")); + // UT_ASSERT(UT_NOT_IMPLEMENTED); + } } @@ -3499,6 +3578,10 @@ bool IE_Imp_RTF::HandleFace(UT_uint32 fontNumber) { + RTFFontTableItem* pFont = GetNthTableFont(fontNumber); + if (pFont != NULL) + m_mbtowc.setInCharset(pFont->m_szCodepage); + return HandleU32CharacterProp(fontNumber, &m_currentRTFState.m_charProps.m_fontNumber); } Index: src/wp/impexp/xp/ie_imp_RTF.h =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_RTF.h,v retrieving revision 1.32 diff -u -r1.32 ie_imp_RTF.h --- src/wp/impexp/xp/ie_imp_RTF.h 2001/05/09 12:34:18 1.32 +++ src/wp/impexp/xp/ie_imp_RTF.h 2001/05/21 08:16:59 @@ -51,6 +51,7 @@ FontFamilyEnum m_family; int m_charSet; int m_codepage; + const char* m_szCodepage; FontPitch m_pitch; unsigned char m_panose[10]; char* m_pFontName;

_________________________________________________________ Do You Yahoo!? Get your free @yahoo.com address at http://mail.yahoo.com



This archive was generated by hypermail 2b25 : Sat May 26 2001 - 03:51:05 CDT