patch: ignored word list saving and loading


Subject: patch: ignored word list saving and loading
From: WJCarpenter (bill-abisource@carpenter.ORG)
Date: Sun Sep 10 2000 - 23:18:12 CDT


This patch (against 090800 sources) implements saving a document's
spellcheck ignored word list with the document when saved in the *.abw
format. The corresponding read back is also implemented. After this
patch, the "ignore all" word list acts like a persistent, per-document
custom dictionary. This patch includes the changes of my previous but
not-yet-committed patch on the same subject of a few days ago. In
other words, if someone decides to commit the attached patch, my
earlier patch can be discarded. If someone would rather see it a
different way, I could cook that up.

I have added two preference variables, SpellCheckIgnoredWordsSave and
SpellCheckIgnoredWordsLoad, and this time around they actually control
the feature (sorry, no GUI yet). As a side effect of the
implementation to use those preferences, PD_Document and AP_Convert
objects now have an XAP_App pointer (and a getApp()). The means that
lots of new places (including the import/export code) can now access
the user preferences. Go hog wild.

Ignored words are saved in a bit of XML that looks like this:

        <ignoredwords>
          <iw>sumthing</iw>
          <iw>utherthing</iw>
        </ignoredwords>

The <ignoredwords> tag is at the same level as <section> and <style>.

Most changes are XP, tested on Linux. I'm still not sure if this
works with the alternative Gnome XML parser, but my guess is that it
does. A small part of the changes is platform-specific, but things
should work if the code meets my modest assumptions about class
hierarchies and member functions. The Unix stuff works for sure.

-- 
bill@carpenter.ORG (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

Buy my house in Woodinville (near Seattle): <http://www.johnlscott.com/57554>

diff -ru abi-090800-ORIG/src/text/ptbl/xp/pd_Document.cpp abi-090800/src/text/ptbl/xp/pd_Document.cpp --- abi-090800-ORIG/src/text/ptbl/xp/pd_Document.cpp Thu Jul 6 01:55:57 2000 +++ abi-090800/src/text/ptbl/xp/pd_Document.cpp Sat Sep 9 22:28:55 2000 @@ -50,9 +50,10 @@ ////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////// -PD_Document::PD_Document() +PD_Document::PD_Document(XAP_App *pApp) : AD_Document(), m_hashDataItems(11) { + m_pApp = pApp; m_pPieceTable = NULL; // perhaps this should be a magic "unknown" or "NULL" value, diff -ru abi-090800-ORIG/src/text/ptbl/xp/pd_Document.h abi-090800/src/text/ptbl/xp/pd_Document.h --- abi-090800-ORIG/src/text/ptbl/xp/pd_Document.h Wed Jul 26 21:15:26 2000 +++ abi-090800/src/text/ptbl/xp/pd_Document.h Sat Sep 9 22:29:43 2000 @@ -44,6 +44,7 @@ class pf_Frag_Strux; class PX_ChangeRecord; class PD_Style; +class XAP_App; #ifdef PT_TEST #include "ut_test.h" @@ -61,7 +62,7 @@ class PD_Document : public AD_Document { public: - PD_Document(); + PD_Document(XAP_App *pApp); virtual UT_Error readFromFile(const char * szFilename, int ieft); virtual UT_Error newDocument(void); @@ -168,6 +169,7 @@ const char * getFileName() { return m_szFilename; } UT_uint32 getLastType() { return m_lastSavedAsType; } + XAP_App * getApp() { return m_pApp; } #ifdef PT_TEST void __dump(FILE * fp) const; @@ -185,6 +187,7 @@ UT_AlphaHashTable m_hashDataItems; IEFileType m_lastSavedAsType; + XAP_App * m_pApp; }; diff -ru abi-090800-ORIG/src/wp/ap/beos/ap_BeOSApp.cpp abi-090800/src/wp/ap/beos/ap_BeOSApp.cpp --- abi-090800-ORIG/src/wp/ap/beos/ap_BeOSApp.cpp Wed Aug 16 13:35:41 2000 +++ abi-090800/src/wp/ap/beos/ap_BeOSApp.cpp Sun Sep 10 10:47:54 2000 @@ -666,7 +666,7 @@ #if CONVERT if (to) { - AP_Convert * conv = new AP_Convert(); + AP_Convert * conv = new AP_Convert(getApp()); conv->setVerbose(verbose); conv->convertTo(m_pArgs->m_argv[k], to); delete conv; diff -ru abi-090800-ORIG/src/wp/ap/beos/ap_BeOSFrame.cpp abi-090800/src/wp/ap/beos/ap_BeOSFrame.cpp --- abi-090800-ORIG/src/wp/ap/beos/ap_BeOSFrame.cpp Sat Aug 19 16:35:39 2000 +++ abi-090800/src/wp/ap/beos/ap_BeOSFrame.cpp Sun Sep 10 10:27:56 2000 @@ -416,7 +416,7 @@ // load a document into the current frame. // if no filename, create a new document. - AD_Document * pNewDoc = new PD_Document(); + AD_Document * pNewDoc = new PD_Document(getApp()); UT_ASSERT(pNewDoc); if (!szFilename || !*szFilename) diff -ru abi-090800-ORIG/src/wp/ap/qnx/ap_QNXFrame.cpp abi-090800/src/wp/ap/qnx/ap_QNXFrame.cpp --- abi-090800-ORIG/src/wp/ap/qnx/ap_QNXFrame.cpp Tue Aug 15 09:20:27 2000 +++ abi-090800/src/wp/ap/qnx/ap_QNXFrame.cpp Sun Sep 10 10:28:08 2000 @@ -463,7 +463,7 @@ // load a document into the current frame. // if no filename, create a new document. - AD_Document * pNewDoc = new PD_Document(); + AD_Document * pNewDoc = new PD_Document(getApp()); UT_ASSERT(pNewDoc); if (!szFilename || !*szFilename) diff -ru abi-090800-ORIG/src/wp/ap/unix/ap_UnixApp.cpp abi-090800/src/wp/ap/unix/ap_UnixApp.cpp --- abi-090800-ORIG/src/wp/ap/unix/ap_UnixApp.cpp Sun Aug 27 18:45:41 2000 +++ abi-090800/src/wp/ap/unix/ap_UnixApp.cpp Sun Sep 10 10:47:07 2000 @@ -1011,7 +1011,7 @@ // [filename] if (to) { - AP_Convert * conv = new AP_Convert(); + AP_Convert * conv = new AP_Convert(getApp()); conv->setVerbose(verbose); conv->convertTo(m_pArgs->m_argv[k], to); delete conv; diff -ru abi-090800-ORIG/src/wp/ap/unix/ap_UnixFrame.cpp abi-090800/src/wp/ap/unix/ap_UnixFrame.cpp --- abi-090800-ORIG/src/wp/ap/unix/ap_UnixFrame.cpp Thu Jul 27 20:24:11 2000 +++ abi-090800/src/wp/ap/unix/ap_UnixFrame.cpp Sun Sep 10 10:28:20 2000 @@ -425,7 +425,7 @@ // load a document into the current frame. // if no filename, create a new document. - AD_Document * pNewDoc = new PD_Document(); + AD_Document * pNewDoc = new PD_Document(getApp()); UT_ASSERT(pNewDoc); if (!szFilename || !*szFilename) diff -ru abi-090800-ORIG/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp abi-090800/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp --- abi-090800-ORIG/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp Sun Aug 27 19:41:01 2000 +++ abi-090800/src/wp/ap/unix/gnome/ap_UnixGnomeApp.cpp Sun Sep 10 10:48:08 2000 @@ -277,7 +277,7 @@ } if (to) { - AP_Convert * conv = new AP_Convert(); + AP_Convert * conv = new AP_Convert(getApp()); conv->setVerbose(verbose); while ((file = poptGetArg (poptcon)) != NULL) { diff -ru abi-090800-ORIG/src/wp/ap/win/ap_Win32App.cpp abi-090800/src/wp/ap/win/ap_Win32App.cpp --- abi-090800-ORIG/src/wp/ap/win/ap_Win32App.cpp Mon Jul 10 14:56:16 2000 +++ abi-090800/src/wp/ap/win/ap_Win32App.cpp Sun Sep 10 10:48:18 2000 @@ -867,7 +867,7 @@ // [filename] if (to) { - AP_Convert * conv = new AP_Convert(); + AP_Convert * conv = new AP_Convert(getApp()); conv->setVerbose(verbose); conv->convertTo(m_pArgs->m_argv[k], to); delete conv; diff -ru abi-090800-ORIG/src/wp/ap/win/ap_Win32Frame.cpp abi-090800/src/wp/ap/win/ap_Win32Frame.cpp --- abi-090800-ORIG/src/wp/ap/win/ap_Win32Frame.cpp Tue Sep 5 02:01:32 2000 +++ abi-090800/src/wp/ap/win/ap_Win32Frame.cpp Sun Sep 10 10:28:42 2000 @@ -1112,7 +1112,7 @@ // load a document into the current frame. // if no filename, create a new document. - AD_Document * pNewDoc = new PD_Document(); + AD_Document * pNewDoc = new PD_Document(getApp()); UT_ASSERT(pNewDoc); if (!szFilename || !*szFilename) diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Convert.cpp abi-090800/src/wp/ap/xp/ap_Convert.cpp --- abi-090800-ORIG/src/wp/ap/xp/ap_Convert.cpp Wed Mar 8 14:35:15 2000 +++ abi-090800/src/wp/ap/xp/ap_Convert.cpp Sun Sep 10 10:44:46 2000 @@ -24,11 +24,14 @@ #include "ie_exp.h" #include "ut_types.h" +class XAP_App; + ////////////////////////////////////////////////////////////////// -AP_Convert::AP_Convert(void) +AP_Convert::AP_Convert(XAP_App *pApp) { m_iVerbose = 1; + m_pApp = pApp; } AP_Convert::~AP_Convert(void) @@ -42,7 +45,7 @@ const char * szTargetFilename, IEFileType targetFormat) { - PD_Document * pNewDoc = new PD_Document(); + PD_Document * pNewDoc = new PD_Document(getApp()); UT_Error error; UT_ASSERT(pNewDoc); diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Convert.h abi-090800/src/wp/ap/xp/ap_Convert.h --- abi-090800-ORIG/src/wp/ap/xp/ap_Convert.h Wed Mar 8 14:35:15 2000 +++ abi-090800/src/wp/ap/xp/ap_Convert.h Sun Sep 10 10:45:16 2000 @@ -28,7 +28,7 @@ class AP_Convert { public: - AP_Convert(void); + AP_Convert(XAP_App *pApp); ~AP_Convert(void); void convertTo(const char * szSourceFilename, @@ -44,9 +44,11 @@ const char * szTargetFormat); void setVerbose(int level); + XAP_App* getApp() { return m_pApp; } private: int m_iVerbose; + XAP_App *m_pApp; }; #endif /* AP_CONVERT_H */ diff -ru abi-090800-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h abi-090800/src/wp/ap/xp/ap_Prefs_SchemeIds.h --- abi-090800-ORIG/src/wp/ap/xp/ap_Prefs_SchemeIds.h Tue Jul 25 14:10:51 2000 +++ abi-090800/src/wp/ap/xp/ap_Prefs_SchemeIds.h Sun Sep 10 11:14:47 2000 @@ -55,6 +55,12 @@ #define AP_PREF_KEY_SpellCheckInternet "SpellCheckInternet" /* enable spell checking internet names {0,1} */ #define AP_PREF_DEFAULT_SpellCheckInternet "1" +#define AP_PREF_KEY_SpellCheckIgnoredWordsSave "SpellCheckIgnoredWordsSave" /* save ignored words list with doc */ +#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsSave "1" + +#define AP_PREF_KEY_SpellCheckIgnoredWordsLoad "SpellCheckIgnoredWordsLoad" /* load ignored words list with doc */ +#define AP_PREF_DEFAULT_SpellCheckIgnoredWordsLoad "1" + #define AP_PREF_KEY_OptionsTabNumber "OptionsTabNumber" /* the page number of the currently shown page in the */ #define AP_PREF_DEFAULT_OptionsTabNumber "0" /* options dialog */ @@ -122,6 +128,8 @@ dcl(SpellCheckCaps) dcl(SpellCheckNumbers) dcl(SpellCheckInternet) +dcl(SpellCheckIgnoredWordsSave) +dcl(SpellCheckIgnoredWordsLoad) dcl(OptionsTabNumber) dcl(RulerUnits) dcl(RulerVisible) diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp abi-090800/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp --- abi-090800-ORIG/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp Wed Jul 26 21:15:31 2000 +++ abi-090800/src/wp/impexp/xp/ie_exp_AbiWord_1.cpp Sun Sep 10 11:36:46 2000 @@ -21,6 +21,7 @@ #include "ut_types.h" #include "ut_bytebuf.h" #include "ut_base64.h" +#include "ut_debugmsg.h" #include "pt_Types.h" #include "ie_exp_AbiWord_1.h" #include "pd_Document.h" @@ -30,6 +31,7 @@ #include "px_CR_Span.h" #include "px_CR_Strux.h" #include "xap_App.h" +#include "ap_Prefs.h" #include "pd_Style.h" /*****************************************************************/ @@ -116,6 +118,7 @@ UT_Bool bNewLineAfter, PT_AttrPropIndex api); void _outputData(const UT_UCSChar * p, UT_uint32 length); void _handleStyles(void); + void _handleIgnoredWords(void); void _handleDataItems(void); PD_Document * m_pDocument; @@ -455,6 +458,7 @@ _handleStyles(); + _handleIgnoredWords(); } s_AbiWord_1_Listener::~s_AbiWord_1_Listener() @@ -620,6 +624,61 @@ if (bWroteOpenStyleSection) m_pie->write("</styles>\n"); + + return; +} + +void s_AbiWord_1_Listener::_handleIgnoredWords(void) +{ + UT_ASSERT(m_pDocument); + XAP_App *pApp = m_pDocument->getApp(); + UT_ASSERT(pApp); + XAP_Prefs *pPrefs = pApp->getPrefs(); + UT_ASSERT(pPrefs); + + UT_Bool saveIgnores; + pPrefs->getPrefsValueBool((XML_Char *)AP_PREF_KEY_SpellCheckIgnoredWordsSave, &saveIgnores); + UT_DEBUGMSG(("Ignored words list %s being saved with document\n", saveIgnores?"is":"is not")); + if (!saveIgnores) return; // don't bother + UT_Bool bWroteOpenIgnoredWordsSection = UT_FALSE; + + const UT_UCSChar *word; + for (UT_uint32 i = 0; m_pDocument->enumIgnores(i, &word); i++) + { + if (!bWroteOpenIgnoredWordsSection) + { + m_pie->write("<ignoredwords>\n"); + bWroteOpenIgnoredWordsSection = UT_TRUE; + } + m_pie->write("<iw>"); + for (UT_uint32 udex=0; word[udex]; ++udex) + { + UT_UCSChar ch = word[udex]; + switch (ch) + { + case '&': m_pie->write("&amp;"); break; + case '<': m_pie->write("&lt;"); break; + case '>': m_pie->write("&gt;"); break; + case '"': m_pie->write("&quot;"); break; + default: + char utb[100]; + if (ch < ' ' || ch >= 128) + { + sprintf(utb, "&#x%x;", ch); + } + else + { + utb[0] = (char)ch; + utb[1] = 0; + } + m_pie->write(utb); + } + } + m_pie->write("</iw>\n"); + } + + if (bWroteOpenIgnoredWordsSection) + m_pie->write("</ignoredwords>\n"); return; } diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp --- abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp Wed Jul 26 21:15:31 2000 +++ abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.cpp Sun Sep 10 11:49:28 2000 @@ -33,6 +33,8 @@ #include "ie_types.h" #include "pd_Document.h" #include "ut_bytebuf.h" +#include "xap_Prefs.h" +#include "ap_Prefs.h" /***************************************************************** ****************************************************************** @@ -259,6 +261,8 @@ #define TT_PAGEBREAK 11 // a forced page-break <pbr> #define TT_STYLESECTION 12 // a style section <styles> #define TT_STYLE 13 // a style <s> within a style section +#define TT_IGNOREDWORDS 14 // an ignored words section <ignoredwords> +#define TT_IGNOREDWORD 15 // a word <iw> within an ignored words section struct _TokenTable { @@ -297,6 +301,8 @@ { "pbr", TT_PAGEBREAK }, { "styles", TT_STYLESECTION }, { "s", TT_STYLE }, + { "ignoredwords", TT_IGNOREDWORDS }, + { "iw", TT_IGNOREDWORD }, { "*", TT_OTHER }}; // must be last #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0]))) @@ -339,6 +345,12 @@ xxx_UT_DEBUGMSG(("startElement: %s\n", name)); X_EatIfAlreadyError(); // xml parser keeps running until buffer consumed + + UT_ASSERT(m_pDocument); + XAP_App *pApp = m_pDocument->getApp(); + UT_ASSERT(pApp); + XAP_Prefs *pPrefs = pApp->getPrefs(); + UT_ASSERT(pPrefs); UT_uint32 tokenIndex = s_mapNameToToken(name); switch (s_Tokens[tokenIndex].m_type) @@ -456,6 +468,23 @@ X_CheckError(m_pDocument->appendStyle(atts)); return; + case TT_IGNOREDWORDS: + X_VerifyParseState(_PS_Doc); + // This caches the preference value. Our assumption is that the ignored words + // list is small with respect to the document size, but nothing forces that. + // The scheme is to parse the ignored words list as usual, but if we don't want + // it loaded from the file, it just isn't added to the in-memory ignored words + // list. The cached preference value keeps us from looking it up for each word. + pPrefs->getPrefsValueBool((XML_Char *)AP_PREF_KEY_SpellCheckIgnoredWordsLoad, &m_bLoadIgnoredWords); + + m_parseState = _PS_IgnoredWordsSec; + return; + + case TT_IGNOREDWORD: + X_VerifyParseState(_PS_IgnoredWordsSec); + m_parseState = _PS_IgnoredWordsItem; + return; + case TT_OTHER: default: UT_DEBUGMSG(("Unknown tag [%s]\n",name)); @@ -564,6 +593,16 @@ m_parseState = _PS_StyleSec; return; + case TT_IGNOREDWORDS: + X_VerifyParseState(_PS_IgnoredWordsSec); + m_parseState = _PS_Doc; + return; + + case TT_IGNOREDWORD: + X_VerifyParseState(_PS_IgnoredWordsItem); + m_parseState = _PS_IgnoredWordsSec; + return; + case TT_OTHER: default: UT_DEBUGMSG(("Unknown end tag [%s]\n",name)); @@ -591,6 +630,7 @@ } case _PS_Block: + case _PS_IgnoredWordsItem: { UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte)); UT_ASSERT(sizeof(XML_Char) != sizeof(UT_UCSChar)); @@ -600,19 +640,17 @@ // [] convert CRLF to SP. // [] convert CR to SP. // [] convert LF to SP. + // ignored words processing doesn't care about the + // white-space stuff, but it does no harm UT_Byte * ss = (UT_Byte *)s; - UT_UCSChar buf[1024]; + UT_UCSChar _buf[1024], *buf = _buf; + // len is an upper bound on the length of the decoded stuff + if (len > 1000) buf = new UT_UCSChar[len+1]; int bufLen = 0; for (int k=0; k<len; k++) { - if (bufLen == NrElements(buf)) // pump it out in chunks - { - X_CheckError(m_pDocument->appendSpan(buf,bufLen)); - bufLen = 0; - } - if ((ss[k] < 0x80) && (m_lenCharDataSeen > 0)) { // is it us-ascii and we are in a UTF-8 @@ -674,10 +712,24 @@ } } - // flush out the last piece of a buffer + // flush out the buffer if (bufLen > 0) - X_CheckError(m_pDocument->appendSpan(buf,bufLen)); + { + switch (m_parseState) + { + case _PS_Block: + X_CheckError(m_pDocument->appendSpan(buf,bufLen)); + break; + case _PS_IgnoredWordsItem: + if (m_bLoadIgnoredWords) X_CheckError(m_pDocument->appendIgnore(buf,bufLen)); + break; + default: + UT_ASSERT(UT_SHOULD_NOT_HAPPEN); + break; + } + } + if (buf != _buf) delete buf; return; } diff -ru abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.h --- abi-090800-ORIG/src/wp/impexp/xp/ie_imp_AbiWord_1.h Wed Jul 26 21:15:31 2000 +++ abi-090800/src/wp/impexp/xp/ie_imp_AbiWord_1.h Sun Sep 10 11:41:42 2000 @@ -82,11 +82,14 @@ _PS_DataSec, _PS_DataItem, _PS_StyleSec, - _PS_Style + _PS_Style, + _PS_IgnoredWordsSec, + _PS_IgnoredWordsItem } ParseState; UT_Error m_error; ParseState m_parseState; + UT_Bool m_bLoadIgnoredWords; XML_Char m_charDataSeen[4]; UT_uint32 m_lenCharDataSeen; UT_uint32 m_lenCharDataExpected;



This archive was generated by hypermail 2b25 : Sun Sep 10 2000 - 23:59:44 CDT