Patch: Multi-encoding Text import/export (2)

Subject: Patch: Multi-encoding Text import/export (2)
From: Andrew Dunbar (hippietrail@yahoo.com)
Date: Sun May 20 2001 - 00:51:48 CDT

sorted by: [ date ] [ thread ] [ subject ] [ author ]
Next message: Sam TH: "commit -- Re: Patch: Multi-encoding Text import/export (2)"
Previous message: Sam TH: "Re: Patch: Multi-encoding Text import/export"
Next in thread: Sam TH: "commit -- Re: Patch: Multi-encoding Text import/export (2)"

Here is the resubmitted path with the suggested changes and
the doxygen comments. Enjoy!

If it's deemed ok the current UTF-8 importer and exporter
should be removed - this is not included in my patch.

Andrew Dunbar.

-- http://linguaphile.sourceforge.net

Index: src/af/util/xp/ut_mbtowc.cpp =================================================================== RCS file: /cvsroot/abi/src/af/util/xp/ut_mbtowc.cpp,v retrieving revision 1.17 diff -u -r1.17 ut_mbtowc.cpp --- src/af/util/xp/ut_mbtowc.cpp 2001/05/03 21:11:37 1.17 +++ src/af/util/xp/ut_mbtowc.cpp 2001/05/20 05:06:08 @@ -22,6 +22,8 @@ #include <limits.h> #include "ut_mbtowc.h" +// UTF-8 can use up to 6 bytes +#define MY_MB_LEN_MAX 6 #if 0 /* big if 0 */ #if defined(__OpenBSD__) || defined(__FreeBSD__) @@ -191,7 +193,7 @@ int UT_Mbtowc::mbtowc(wchar_t &wc,char mb) { - if(++m_bufLen>MB_LEN_MAX) + if(++m_bufLen>MY_MB_LEN_MAX) { initialize(); return 0; @@ -202,7 +204,7 @@ #else size_t thisLen=mbrtowc(&wc,m_buf,m_bufLen,&m_state); #endif - if(thisLen>MB_LEN_MAX)return 0; + if(thisLen>MY_MB_LEN_MAX)return 0; if(thisLen==0)thisLen=1; m_bufLen-=thisLen; return 1; @@ -229,6 +231,12 @@ cd = iconv_open("UCS-2", charset ); }; +UT_Mbtowc::UT_Mbtowc(const char* from_charset): m_bufLen(0) +{ + cd = iconv_open("UCS-2", from_charset); + UT_ASSERT(cd != (iconv_t)-1); +}; + UT_Mbtowc::UT_Mbtowc(): m_bufLen(0) { cd = iconv_open("UCS-2", XAP_EncodingManager::get_instance()->getNativeEncodingName() ); @@ -250,7 +258,7 @@ int UT_Mbtowc::mbtowc(wchar_t &wc,char mb) { - if(++m_bufLen>MB_LEN_MAX) { + if(++m_bufLen>MY_MB_LEN_MAX) { initialize(); return 0; } Index: src/af/util/xp/ut_mbtowc.h =================================================================== RCS file: /cvsroot/abi/src/af/util/xp/ut_mbtowc.h,v retrieving revision 1.9 diff -u -r1.9 ut_mbtowc.h --- src/af/util/xp/ut_mbtowc.h 2000/11/04 04:54:56 1.9 +++ src/af/util/xp/ut_mbtowc.h 2001/05/20 05:06:08 @@ -49,6 +49,7 @@ public: void initialize(); UT_Mbtowc(); + UT_Mbtowc(const char* from_charset); UT_Mbtowc(const UT_Mbtowc& v); ~UT_Mbtowc(); int mbtowc(wchar_t &wc,char mb); Index: src/wp/impexp/xp/ie_exp.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_exp.cpp,v retrieving revision 1.46 diff -u -r1.46 ie_exp.cpp --- src/wp/impexp/xp/ie_exp.cpp 2001/05/05 20:08:13 1.46 +++ src/wp/impexp/xp/ie_exp.cpp 2001/05/20 05:06:44 @@ -109,7 +109,8 @@ // TODO add code to make a backup of the original file, if it exists. #ifndef HAVE_GNOMEVFS - m_fp = fopen(szFilename,"w"); + // Open file in binary mode or UCS-2 output will be mangled. + m_fp = fopen(szFilename,"wb"); return (m_fp != 0); #else GnomeVFSResult result; Index: src/wp/impexp/xp/ie_exp_Text.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_exp_Text.cpp,v retrieving revision 1.23 diff -u -r1.23 ie_exp_Text.cpp --- src/wp/impexp/xp/ie_exp_Text.cpp 2001/05/05 20:08:13 1.23 +++ src/wp/impexp/xp/ie_exp_Text.cpp 2001/05/20 05:06:52 @@ -34,6 +34,8 @@ #include "ut_string_class.h" +#define MY_MB_LEN_MAX 6 + ////////////////////////////////////////////////////////////////// // a private listener class to help us translate the document // into a text stream. code is at the bottom of this file. @@ -70,12 +72,18 @@ protected: void _closeBlock(void); void _outputData(const UT_UCSChar * p, UT_uint32 length); + void _output8BitData(const UT_UCSChar * , UT_uint32 length); + void _output16BitData(const UT_UCSChar * , UT_uint32 length); PD_Document * m_pDocument; IE_Exp_Text * m_pie; bool m_bInBlock; bool m_bToClipboard; - UT_Wctomb m_wctomb; + bool m_bFirstWrite; + UT_Wctomb m_wctomb; + const char * m_szEncoding; + bool m_bBigEndian; + bool m_bUseBOM; }; /*****************************************************************/ @@ -88,6 +96,9 @@ m_pListener = NULL; } +/*! + Destruct text exporter + */ IE_Exp_Text::~IE_Exp_Text() { } @@ -95,6 +106,10 @@ /*****************************************************************/ /*****************************************************************/ +/*! + Check filename extension for filetypes we support + \param szSuffix Filename extension + */ bool IE_Exp_Text_Sniffer::recognizeSuffix(const char * szSuffix) { return (!UT_stricmp(szSuffix,".txt") || !UT_stricmp(szSuffix, ".text")); @@ -112,7 +127,7 @@ const char ** pszSuffixList, IEFileType * ft) { - *pszDesc = "Text (.txt)"; + *pszDesc = "Text (.txt, .text)"; *pszSuffixList = "*.text; *.txt"; *ft = getFileType(); return true; @@ -144,34 +159,67 @@ if (!m_bInBlock) return; -#ifdef WIN32 // we need to generate CRLFs on Win32 - if (m_bToClipboard) // when writing to the clipboard. we - m_pie->write("\r"); // use text mode when going to a file -#endif // so we don't need to then. + // TODO All writes should be re-routed via iconv since UCS-2 + // TODO uses two bytes for each character. + // TODO Old Mac should use "\r". Mac OSX should Use U+2028 or U+2029. + // TODO We always add an extra line break at the end of a file but shouldn't +#ifdef WIN32 + m_pie->write("\r\n"); +#else m_pie->write("\n"); +#endif m_bInBlock = false; return; } void s_Text_Listener::_outputData(const UT_UCSChar * data, UT_uint32 length) { + if (m_szEncoding && !strncmp(m_szEncoding,"UCS-2",5)) + _output16BitData(data, length); + else + _output8BitData(data, length); +} + +/*! + Output 8-bit text buffer to file + \param data Buffer to output + \param length Size of buffer + + Single byte and multi byte encodings are supported. + Wide character encodings are not supported. + The buffer must not contain NULL bytes. + */ +void s_Text_Listener::_output8BitData(const UT_UCSChar * data, UT_uint32 length) +{ UT_String sBuf; const UT_UCSChar * pData; int mbLen; - char pC[MB_LEN_MAX]; - + char pC[MY_MB_LEN_MAX]; + UT_ASSERT(sizeof(UT_Byte) == sizeof(char)); + if (m_bFirstWrite) + { + if (m_szEncoding) + m_wctomb.setOutCharset(m_szEncoding); + if (m_bUseBOM) + { + // TODO There may be reason for using a BOM in UTF-8 text. + // TODO I've seen MS software do it. + m_pie->write("\xef\xbb\xbf",3); + } + m_bFirstWrite = false; + } + for (pData=data; (pData<data+length); /**/) { if(!m_wctomb.wctomb(pC,mbLen,(wchar_t)*pData)) { - mbLen=1; - pC[0]='?'; - m_wctomb.initialize(); + mbLen=1; + pC[0]='?'; + m_wctomb.initialize(); } - pData++; if (mbLen>1) { sBuf += pC; @@ -179,17 +227,96 @@ else { // We let any UCS_LF's (forced line breaks) go out as is. + // TODO Old Mac should use "\r". Mac OSX should Use U+2028 or U+2029. #ifdef WIN32 - if (m_bToClipboard && pC[0]==UCS_LF) + if (pC[0]==UCS_LF) sBuf += "\r"; #endif sBuf += (char)pC[0]; } + pData++; } m_pie->write(sBuf.c_str(),sBuf.size()); } +/*! + Output 16-bit text buffer to file + \param data Buffer to output + \param length Size of buffer + + Supports the UCS-2 encodings. UCS-2 streams include NULL bytes. + */ +void s_Text_Listener::_output16BitData(const UT_UCSChar * data, UT_uint32 length) +{ + const UT_UCSChar * pInData; + char * pOutData; + + int mbLen; + unsigned char pC[MY_MB_LEN_MAX]; + char * pConvertedData = 0; + + UT_ASSERT(sizeof(UT_Byte) == sizeof(char)); + + pConvertedData = new char[length * sizeof(UT_UCSChar)]; + pOutData = pConvertedData; + + UT_ASSERT(pConvertedData); + + if (m_bFirstWrite) + { + if (m_szEncoding) + m_wctomb.setOutCharset(m_szEncoding); + if (m_bUseBOM) + { + if (m_bBigEndian) + m_pie->write("\xfe\xff",2); + else + m_pie->write("\xff\xfe",2); + } + m_bFirstWrite = false; + } + + for (pInData=data; (pInData<data+length); /**/) + { + if(!m_wctomb.wctomb(reinterpret_cast<char *>(pC),mbLen,(wchar_t)*pInData)) + { + // TODO U+FFFD "REPLACEMENT CHARACTER" is the + // TODO correct unicode equivalent of '?' isn't it? + mbLen=2; + if (m_bBigEndian) + { + pC[0]=0xff; + pC[1]=0xfd; + } + else + { + pC[0]=0xfd; + pC[1]=0xff; + } + m_wctomb.initialize(); + } + // We let any UCS_LF's (forced line breaks) go out as is. + if (*pInData == UCS_LF) + { + // TODO Old Mac should use "\r". Mac OSX should Use U+2028 or U+2029. +#ifdef WIN32 + // TODO Win needs to *insert* an extra CR character before the LF. + // TODO The old 8-bit code used UT_String which could grow dynamically + // TODO but the 16-bit code uses a fixed size buffer. + // TODO What is an appropriate solution? +#endif + } + *pOutData++ = pC[0]; + *pOutData++ = pC[1]; + ++pInData; + } + + m_pie->write(pConvertedData,length * sizeof(UT_UCSChar)); + + delete [] pConvertedData; +} + s_Text_Listener::s_Text_Listener(PD_Document * pDocument, IE_Exp_Text * pie, bool bToClipboard) @@ -201,8 +328,15 @@ // assume that we are starting in the middle of a block. // when going to a file we should not. m_bInBlock = m_bToClipboard; + m_bFirstWrite = true; + m_szEncoding = 0; + m_bBigEndian = true; + m_bUseBOM = false; } +/*! + Destruct listener + */ s_Text_Listener::~s_Text_Listener() { _closeBlock(); Index: src/wp/impexp/xp/ie_imp.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp.cpp,v retrieving revision 1.40 diff -u -r1.40 ie_imp.cpp --- src/wp/impexp/xp/ie_imp.cpp 2001/05/07 16:50:43 1.40 +++ src/wp/impexp/xp/ie_imp.cpp 2001/05/20 05:06:53 @@ -176,7 +176,8 @@ char szBuf[4096]; // 4096 ought to be enough int iNumbytes; FILE *f; - if ( ( f = fopen( szFilename, "r" ) ) != (FILE *)0 ) + // we must open in binary mode for UCS-2 compatibility + if ( ( f = fopen( szFilename, "rb" ) ) != (FILE *)0 ) { iNumbytes = fread(szBuf, 1, sizeof(szBuf), f); fclose(f); Index: src/wp/impexp/xp/ie_imp_Text.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v retrieving revision 1.24 diff -u -r1.24 ie_imp_Text.cpp --- src/wp/impexp/xp/ie_imp_Text.cpp 2001/05/03 00:45:36 1.24 +++ src/wp/impexp/xp/ie_imp_Text.cpp 2001/05/20 05:07:09 @@ -32,18 +32,185 @@ /*****************************************************************/ /*****************************************************************/ +/*! + Check buffer for identifiable encoded characters + \param szBuf Buffer to check + \param iNumbytes Size of buffer + */ bool IE_Imp_Text_Sniffer::recognizeContents(const char * szBuf, UT_uint32 iNumbytes) { - // We give the other guys a chance, since this - // importer is so generic. - return false; + // TODO It may or may not be worthwhile trying to recognize CJK encodings. + + bool bSuccess = false; + + bSuccess = _recognizeUTF8(szBuf, iNumbytes); + + if (bSuccess == false) + { + if (_recognizeUCS2(szBuf, iNumbytes, false) != UE_NotUCS) + { + bSuccess = true; + } + } + + return bSuccess; } +/*! + Check buffer for UTF-8 encoded characters + \param szBuf Buffer to check + \param iNumbytes Size of buffer + */ +bool IE_Imp_Text_Sniffer::_recognizeUTF8(const char * szBuf, + UT_uint32 iNumbytes) +{ + bool bSuccess = false; + const unsigned char *p = reinterpret_cast<const unsigned char *>(szBuf); + + while (p < reinterpret_cast<const unsigned char *>(szBuf + iNumbytes)) + { + UT_sint32 iLen; + + if ((*p & 0x80) == 0) // ASCII + { + ++p; + continue; + } + else if ((*p & 0xc0) == 0x80) // not UTF-8 + { + return false; + } + else if (*p == 0xfe || *p == 0xff) + { + // BOM shouldn't occur in UTF-8 - file may be UCS-2 + return false; + } + else if ((*p & 0xfe) == 0xfc) // lead byte in 6-byte sequence + iLen = 6; + else if ((*p & 0xfc) == 0xf8) // lead byte in 5-byte sequence + iLen = 5; + else if ((*p & 0xf8) == 0xf0) // lead byte in 4-byte sequence + iLen = 4; + else if ((*p & 0xf0) == 0xe0) // lead byte in 3-byte sequence + iLen = 3; + else if ((*p & 0xe0) == 0xc0) // lead byte in 2-byte sequence + iLen = 2; + else + { + // the above code covers all cases - if we reach here the logic is wrong + UT_ASSERT(UT_SHOULD_NOT_HAPPEN); + return false; + } + + while (--iLen) + { + ++p; + if (p >= reinterpret_cast<const unsigned char *>(szBuf + iNumbytes)) + { + //UT_DEBUGMSG((" out of data!\n")); + break; + } + if ((*p & 0xc0) != 0x80) + return false; + } + // all bytes in sequence were ok + bSuccess = true; + ++p; + } + + return bSuccess; +} + +/*! + Check buffer for UCS-2 encoded characters + \param szBuf Buffer to check + \param iNumbytes Size of buffer + \param bDeep Set to true for extra, non-authoritative tests + */ +IE_Imp_Text_Sniffer::UCS2_Endian IE_Imp_Text_Sniffer::_recognizeUCS2(const char * szBuf, + UT_uint32 iNumbytes, + bool bDeep) +{ + UCS2_Endian eResult = UE_NotUCS; + + if (iNumbytes >= 2) + { + const unsigned char *p = reinterpret_cast<const unsigned char *>(szBuf); + + // Big endian ? + if (p[0] == 0xfe && p[1] == 0xff) + eResult = UE_BigEnd; + + // Little endian + else if (p[0] == 0xff && p[1] == 0xfe) + eResult = UE_LittleEnd; + + if (eResult == UE_NotUCS && bDeep) + { + // If we know this is a text file, know it isn't UTF-8, and it doesn't + // begin with a BOM, let's try a couple of heuristics too see if it + // might be a UCS-2 file without a BOM. + // Since CR and LF are very common and their endian-swapped counterparts + // are reserved in Unicode, they should only exist in big endian or + // little endian but not both. + // If there are no CRs or LFs we fall back on counting how many characters + // fall within the ASCII range for both endians. The one with the higher + // count wins. + // Text files which contain NUL characters will be wrongly identified as + // UCS-2 using this technique. + + UT_sint32 iLineEndBE = 0; + UT_sint32 iLineEndLE = 0; + UT_sint32 iAsciiBE = 0; + UT_sint32 iAsciiLE = 0; + + // Count all CR, LF, and ASCII range characters. + for (p = reinterpret_cast<const unsigned char *>(szBuf); + p < reinterpret_cast<const unsigned char *>(szBuf + iNumbytes - 1); + p += 2) + { + // A 16-bit null character probably won't exist in a UCS-2 file + if (p[0] == 0 && p[1] == 0) + break; + if (p[0] == 0) + { + ++iAsciiBE; + if (p[1] == 0x0A || p[1] == 0x0D) + ++iLineEndBE; + } + if (p[1] == 0) + { + ++iAsciiLE; + if (p[0] == 0x0A || p[0] == 0x0D) + ++iLineEndLE; + } + } + + // Take an educated guess. + if (iLineEndBE && !iLineEndLE) + eResult = UE_BigEnd; + else if (iLineEndLE && !iLineEndBE) + eResult = UE_LittleEnd; + else if (!iLineEndBE && !iLineEndLE) + { + if (iAsciiBE > iAsciiLE) + eResult = UE_BigEnd; + else if (iAsciiLE > iAsciiBE) + eResult = UE_LittleEnd; + } + } + } + + return eResult; +} + +/*! + Check filename extension for filetypes we support + \param szSuffix Filename extension + */ bool IE_Imp_Text_Sniffer::recognizeSuffix(const char * szSuffix) { - // We give the other guys a chance, since this - // importer is so generic. return (!UT_stricmp (szSuffix, ".txt") || !UT_stricmp(szSuffix, ".text")); } @@ -59,8 +226,8 @@ const char ** pszSuffixList, IEFileType * ft) { - *pszDesc = "Text (.txt)"; - *pszSuffixList = "*.txt"; + *pszDesc = "Text (.txt, .text)"; + *pszSuffixList = "*.txt; *.text"; *ft = getFileType(); return true; } @@ -69,10 +236,9 @@ /*****************************************************************/ /* - Import US-ASCII (actually Latin-1) data from a plain - text file. We allow either LF or CR or CRLF line - termination. Each line terminator is taken to be a - paragraph break. + Import data from a plain text file. We allow either + LF or CR or CRLF line termination. Each line + terminator is taken to be a paragraph break. */ /*****************************************************************/ @@ -82,7 +248,8 @@ UT_Error IE_Imp_Text::importFile(const char * szFilename) { - FILE *fp = fopen(szFilename, "r"); + // We must open in binary mode for UCS-2 compatibility. + FILE *fp = fopen(szFilename, "rb"); if (!fp) { UT_DEBUGMSG(("Could not open file %s\n",szFilename)); @@ -91,6 +258,9 @@ UT_Error error; + // First we need to determine the encoding. + // TODO We might want to find a way to combine this with recognizeContents(). + X_CleanupIfError(error,_recognizeEncoding(fp)); X_CleanupIfError(error,_writeHeader(fp)); X_CleanupIfError(error,_parseFile(fp)); @@ -106,6 +276,9 @@ /*****************************************************************/ /*****************************************************************/ +/*! + Destruct text importer + */ IE_Imp_Text::~IE_Imp_Text() { } @@ -113,6 +286,7 @@ IE_Imp_Text::IE_Imp_Text(PD_Document * pDocument) : IE_Imp(pDocument) { + m_szEncoding = 0; } /*****************************************************************/ @@ -121,6 +295,44 @@ #define X_ReturnIfFail(exp,error) do { bool b = (exp); if (!b) return (error); } while (0) #define X_ReturnNoMemIfError(exp) X_ReturnIfFail(exp,UT_IE_NOMEMORY) +/*! + Detect encoding of text file + \param fp File + + Supports UTF-8 and UCS-2 big and little endian + CJK encodings could be added + */ +UT_Error IE_Imp_Text::_recognizeEncoding(FILE * fp) +{ + char szBuf[4096]; // 4096 ought to be enough + UT_sint32 iNumbytes; + + iNumbytes = fread(szBuf, 1, sizeof(szBuf), fp); + fseek(fp, 0, SEEK_SET); + + if (IE_Imp_Text_Sniffer::_recognizeUTF8(szBuf, iNumbytes)) + { + m_szEncoding = "UTF-8"; + } + else + { + IE_Imp_Text_Sniffer::UCS2_Endian eUcs2 = IE_Imp_Text_Sniffer::UE_NotUCS; + + eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2(szBuf, iNumbytes, true); + + if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd) + { + m_szEncoding = "UCS-2-BE"; + } + else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd) + { + m_szEncoding = "UCS-2-LE"; + } + } + + return UT_OK; +} + UT_Error IE_Imp_Text::_writeHeader(FILE * /* fp */) { X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Section, NULL)); @@ -137,6 +349,9 @@ UT_UCSChar c; wchar_t wc; + if (m_szEncoding) + m_Mbtowc.setInCharset(m_szEncoding); + while (fread(&b, 1, sizeof(b), fp) > 0) { if(!m_Mbtowc.mbtowc(wc,b)) @@ -146,6 +361,8 @@ { case (UT_UCSChar)'\r': case (UT_UCSChar)'\n': + case 0x2028: // Unicode line separator + case 0x2029: // Unicode paragraph separator if ((c == (UT_UCSChar)'\n') && bEatLF) { @@ -158,7 +375,9 @@ bEatLF = true; } - // we interprete either CRLF, CR, or LF as a paragraph break. + // we interpret either CRLF, CR, or LF as a paragraph break. + // we also accept U+2028 (line separator) and U+2029 (para separator) + // especially since these are recommended by Mac OS X. // start a paragraph and emit any text that we // have accumulated. @@ -224,6 +443,8 @@ { case (UT_UCSChar)'\r': case (UT_UCSChar)'\n': + case 0x2028: // Unicode line separator + case 0x2029: // Unicode paragraph separator if ((c == (UT_UCSChar)'\n') && bEatLF) { bEatLF = false; @@ -235,7 +456,9 @@ bEatLF = true; } - // we interprete either CRLF, CR, or LF as a paragraph break. + // we interpret either CRLF, CR, or LF as a paragraph break. + // we also accept U+2028 (line separator) and U+2029 (para separator) + // especially since these are recommended by Mac OS X. if (gbBlock.getLength() > 0) { Index: src/wp/impexp/xp/ie_imp_Text.h =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.h,v retrieving revision 1.13 diff -u -r1.13 ie_imp_Text.h --- src/wp/impexp/xp/ie_imp_Text.h 2001/05/03 00:45:36 1.13 +++ src/wp/impexp/xp/ie_imp_Text.h 2001/05/20 05:07:10 @@ -31,6 +31,7 @@ class IE_Imp_Text_Sniffer : public IE_ImpSniffer { friend class IE_Imp; + friend class IE_Imp_Text; public: IE_Imp_Text_Sniffer() {} @@ -45,6 +46,14 @@ virtual UT_Error constructImporter (PD_Document * pDocument, IE_Imp ** ppie); +protected: + enum UCS2_Endian { UE_BigEnd = -1, UE_NotUCS = 0, UE_LittleEnd }; + + static bool _recognizeUTF8 (const char * szBuf, + UT_uint32 iNumbytes); + static UCS2_Endian _recognizeUCS2 (const char * szBuf, + UT_uint32 iNumbytes, + bool bDeep); }; class IE_Imp_Text : public IE_Imp @@ -58,9 +67,11 @@ unsigned char * pData, UT_uint32 lenData); protected: + UT_Error _recognizeEncoding(FILE * fp); UT_Error _parseFile(FILE * fp); UT_Error _writeHeader(FILE * fp); UT_Mbtowc m_Mbtowc; + const char * m_szEncoding; }; #endif /* IE_IMP_TEXT_H */

_________________________________________________________ Do You Yahoo!? Get your free @yahoo.com address at http://mail.yahoo.com

Next message: Sam TH: "commit -- Re: Patch: Multi-encoding Text import/export (2)"
Previous message: Sam TH: "Re: Patch: Multi-encoding Text import/export"
Next in thread: Sam TH: "commit -- Re: Patch: Multi-encoding Text import/export (2)"

This archive was generated by hypermail 2b25 : Sat May 26 2001 - 03:51:05 CDT