patch: export/import ignored words list with document


Subject: patch: export/import ignored words list with document
From: WJCarpenter (bill-abisource@carpenter.ORG)
Date: Tue Sep 05 2000 - 17:03:27 CDT


This patch (against 090100 sources) implements a new feature of saving
a document's spellcheck ignored word list with the document when saved
in the *.abw format. The corresponding read back is also implemented.
After this patch, the "ignore all" word list acts like a persistent,
per-document custom dictionary.

I have added two preference variables, SpellCheckIgnoredWordsSave and
SpellCheckIgnoredWordsLoad, but they don't actually control the
feature at this point (both halves are always on). The implementation
delay is because preference values aren't available in the
import/export code, and it will take me a while to root through the
class hierarchy to find a simple way to get at them. (One of the
downsides of a certain style of OO programming that is otherwise
mostly A Good Thing.)

Ignored words are saved in a bit of XML that looks like this:

        <ignoredwords>
          <iw>sumthing</iw>
          <iw>utherthing</iw>
        </ignoredwords>

The <ignoredwords> tag is at the same level as <section> and <style>.

All changes are XP, tested on Linux. I am not sure if this works with
the alternative Gnome XML parser, but my guess is that it does.

-- 
bill@carpenter.ORG (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

Buy my house in Woodinville (near Seattle): <http://www.johnlscott.com/57554>

diff -ru abi-090100-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h abi-090100/src/wp/ap/xp/ap_Prefs_SchemeIds.h --- abi-090100-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h Tue Jul 25 14:10:51 2000 +++ abi-090100/src/wp/ap/xp/ap_Prefs_SchemeIds.h Tue Sep 5 15:58:39 2000 @@ -55,6 +55,12 @@ #define AP_PREF_KEY_SpellCheckInternet "SpellCheckInternet" /* enable spell checking internet names {0,1} */ #define AP_PREF_DEFAULT_SpellCheckInternet "1" +#define AP_PREF_KEY_SpellCheckIgnoredWordsSave "SpellCheckIgnoredWordsSave" /* save ignored words list with doc */ +#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsSave "1" + +#define AP_PREF_KEY_SpellCheckIgnoredWordsLoad "SpellCheckIgnoredWordsLoad" /* load ignored words list with doc */ +#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsLoad "1" + #define AP_PREF_KEY_OptionsTabNumber "OptionsTabNumber" /* the page number of the currently shown page in the */ #define AP_PREF_DEFAULT_OptionsTabNumber "0" /* options dialog */ diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp abi-090100/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp --- abi-090100-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp Wed Jul 26 21:15:31 2000 +++ abi-090100/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp Tue Sep 5 21:30:13 2000 @@ -116,6 +116,7 @@ UT_Bool bNewLineAfter, PT_AttrPropIndex api); void _outputData(const UT_UCSChar * p, UT_uint32 length); void _handleStyles(void); + void _handleIgnoredWords(void); void _handleDataItems(void); PD_Document * m_pDocument; @@ -455,6 +456,7 @@ _handleStyles(); + _handleIgnoredWords(); } s_AbiWord_1_Listener::~s_AbiWord_1_Listener() @@ -620,6 +622,54 @@ if (bWroteOpenStyleSection) m_pie->write("</styles>\n"); + + return; +} + +void s_AbiWord_1_Listener::_handleIgnoredWords(void) +{ + UT_Bool saveIgnores; + //pPrefs->getPrefsValueBool((XML_Char *)XAP_PREF_KEY_SpellCheckIgnoredWordsSave, &saveIgnores); + if (!saveIgnores) return; // don't bother + UT_Bool bWroteOpenIgnoredWordsSection = UT_FALSE; + + const UT_UCSChar *word; + for (UT_uint32 i = 0; m_pDocument->enumIgnores(i, &word); i++) + { + if (!bWroteOpenIgnoredWordsSection) + { + m_pie->write("<ignoredwords>\n"); + bWroteOpenIgnoredWordsSection = UT_TRUE; + } + m_pie->write("<iw>"); + for (UT_uint32 udex=0; word[udex]; ++udex) + { + UT_UCSChar ch = word[udex]; + switch (ch) + { + case '&': m_pie->write("&amp;"); break; + case '<': m_pie->write("&lt;"); break; + case '>': m_pie->write("&gt;"); break; + case '"': m_pie->write("&quot;"); break; + default: + char utb[100]; + if (ch < ' ' || ch >= 128) + { + sprintf(utb, "&#x%x;", ch); + } + else + { + utb[0] = (char)ch; + utb[1] = 0; + } + m_pie->write(utb); + } + } + m_pie->write("</iw>\n"); + } + + if (bWroteOpenIgnoredWordsSection) + m_pie->write("</ignoredwords>\n"); return; } diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp --- abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp Wed Jul 26 21:15:31 2000 +++ abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp Tue Sep 5 14:37:19 2000 @@ -259,6 +259,8 @@ #define TT_PAGEBREAK 11 // a forced page-break <pbr> #define TT_STYLESECTION 12 // a style section <styles> #define TT_STYLE 13 // a style <s> within a style section +#define TT_IGNOREDWORDS 14 // an ignored words section <ignoredwords> +#define TT_IGNOREDWORD 15 // a word <iw> within an ignored words section struct _TokenTable { @@ -297,6 +299,8 @@ { "pbr", TT_PAGEBREAK }, { "styles", TT_STYLESECTION }, { "s", TT_STYLE }, + { "ignoredwords", TT_IGNOREDWORDS }, + { "iw", TT_IGNOREDWORD }, { "*", TT_OTHER }}; // must be last #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0]))) @@ -456,6 +460,16 @@ X_CheckError(m_pDocument->appendStyle(atts)); return; + case TT_IGNOREDWORDS: + X_VerifyParseState(_PS_Doc); + m_parseState = _PS_IgnoredWordsSec; + return; + + case TT_IGNOREDWORD: + X_VerifyParseState(_PS_IgnoredWordsSec); + m_parseState = _PS_IgnoredWordsItem; + return; + case TT_OTHER: default: UT_DEBUGMSG(("Unknown tag [%s]\n",name)); @@ -564,6 +578,16 @@ m_parseState = _PS_StyleSec; return; + case TT_IGNOREDWORDS: + X_VerifyParseState(_PS_IgnoredWordsSec); + m_parseState = _PS_Doc; + return; + + case TT_IGNOREDWORD: + X_VerifyParseState(_PS_IgnoredWordsItem); + m_parseState = _PS_IgnoredWordsSec; + return; + case TT_OTHER: default: UT_DEBUGMSG(("Unknown end tag [%s]\n",name)); @@ -591,6 +615,7 @@ } case _PS_Block: + case _PS_IgnoredWordsItem: { UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte)); UT_ASSERT(sizeof(XML_Char) != sizeof(UT_UCSChar)); @@ -600,19 +625,17 @@ // [] convert CRLF to SP. // [] convert CR to SP. // [] convert LF to SP. + // ignored words processing doesn't care about the + // white-space stuff, but it does no harm UT_Byte * ss = (UT_Byte *)s; - UT_UCSChar buf[1024]; + UT_UCSChar _buf[1024], *buf = _buf; + // len is an upper bound on the length of the decoded stuff + if (len > 1000) buf = new UT_UCSChar[len+1]; int bufLen = 0; for (int k=0; k<len; k++) { - if (bufLen == NrElements(buf)) // pump it out in chunks - { - X_CheckError(m_pDocument->appendSpan(buf,bufLen)); - bufLen = 0; - } - if ((ss[k] < 0x80) && (m_lenCharDataSeen > 0)) { // is it us-ascii and we are in a UTF-8 @@ -674,10 +697,24 @@ } } - // flush out the last piece of a buffer + // flush out the buffer if (bufLen > 0) - X_CheckError(m_pDocument->appendSpan(buf,bufLen)); + { + switch (m_parseState) + { + case _PS_Block: + X_CheckError(m_pDocument->appendSpan(buf,bufLen)); + break; + case _PS_IgnoredWordsItem: + X_CheckError(m_pDocument->appendIgnore(buf,bufLen)); + break; + default: + UT_ASSERT(UT_SHOULD_NOT_HAPPEN); + break; + } + } + if (buf != _buf) delete buf; return; } diff -ru abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.h --- abi-090100-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h Wed Jul 26 21:15:31 2000 +++ abi-090100/src/wp/impexp/xp/ie_imp_AbiWord_1.h Tue Sep 5 14:15:24 2000 @@ -82,7 +82,9 @@ _PS_DataSec, _PS_DataItem, _PS_StyleSec, - _PS_Style + _PS_Style, + _PS_IgnoredWordsSec, + _PS_IgnoredWordsItem } ParseState; UT_Error m_error;



This archive was generated by hypermail 2b25 : Thu Sep 07 2000 - 13:25:13 CDT