patch: ignored word maintenance


Subject: patch: ignored word maintenance
From: WJCarpenter (bill-abisource@carpenter.ORG)
Date: Mon Sep 04 2000 - 23:11:00 CDT


This patch, against 090100 sources, fixes up various things in the
maintenance of ispell's ignored words list.

1. All *alloc()/free() sets changed to new/delete. Also eliminated
the need for memory allocation for scratch buffers for most common
cases. Ignored word maintenance isn't exactly performance critical,
but every little bit helps.

2. The signature of enumIgnores() was changed because the signature
it had would have been ineffective. (enumIgnores() was not previously
called, but is called now for a UT_DEBUGMSG() in the AD_Document
destructor.)

3. Fixed a sort of interesting problem about duplicate entries. When
you "ignore all" on some word, spell-check queues all the blocks in a
document for re-spell-checking. That's done in the background. For a
long document, it's easy to do "ignore all" on another instance of the
same word before the squiggle is removed. That leads to an attempt at
adding a duplicate entry to the hash table, and that leads to an
assert. The assert looks harmless to me, but since hash tables are
used all over the place, I didn't remove it. Instead, I made ignored
word maintenance check for an entry before trying to add it.

-- 
bill@carpenter.ORG (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

Buy my house in Woodinville (near Seattle): <http://www.johnlscott.com/57544> <http://www.carpenter.org/bill/house.jpg>

diff -ru abi-090100-ORIG/src/af/xap/xp/xad_Document.cpp abi-090100/src/af/xap/xp/xad_Document.cpp --- abi-090100-ORIG/src/af/xap/xp/xad_Document.cpp Mon Jun 26 14:46:01 2000 +++ abi-090100/src/af/xap/xp/xad_Document.cpp Sun Sep 3 22:23:38 2000 @@ -37,13 +37,18 @@ AD_Document::~AD_Document() { UT_ASSERT(m_iRefCount == 0); + for (UT_uint32 i = 0; i < (unsigned)m_pIgnoreList->getEntryCount(); i++) + { + // not careful results checking, etc, in this loop, but it's just for debugging anyhow + const UT_UCSChar *word = 0; + char copy[1000]; + enumIgnores(i, &word); + UT_DEBUGMSG(("AD_Document::~AD_Document(), ignored spell word \"%s\"\n", UT_UCS_strcpy_to_char(copy, word))); + } // free all of the words on the list first - for (int i = 0; i < m_pIgnoreList->getEntryCount(); i++) { - UT_HashEntry * pHE = m_pIgnoreList->getNthEntry(i); - FREEP(pHE->pData); - } - // the free the ignore list + clearIgnores(); + // then free the ignore list DELETEP(m_pIgnoreList); // NOTE: let subclass clean up m_szFilename, so it matches the alloc mechanism @@ -78,16 +83,9 @@ { UT_ASSERT(m_pIgnoreList); - char * key = (char *) calloc(len+1, sizeof(char)); - UT_UCSChar * copy = (UT_UCSChar *) calloc(len+1, sizeof(UT_UCSChar)); - - if (!key || !copy) - { - UT_DEBUGMSG(("mem failure adding word to dictionary\n")); - FREEP(key); - FREEP(copy); - return UT_FALSE; - } + char _key[150], *key = _key; + if (len > 145) key = new char[len + 1]; + UT_UCSChar *copy = new UT_UCSChar[len + 1]; for (UT_uint32 i = 0; i < len; i++) { @@ -98,10 +96,23 @@ key[i] = (char) currentChar; copy[i] = currentChar; } + key[len] = 0; + copy[len] = 0; - UT_sint32 iRes = m_pIgnoreList->addEntry(key, NULL, (void*) copy); + UT_sint32 iRes = -1; - FREEP(key); + if (!isIgnore(pWord, len)) + { + // If it's already on the ignored word list, don't add it again. + // This can happen if you are looking at a longish document. You + // "ignore all" a word, but spell-check doesn't get around to removing + // the squiggles in the background for a while. Then, you "ignore all" + // that word (or another instance of it) again, and ka-bloom, the + // hash table stuff asserts on a duplicate entry. + iRes = m_pIgnoreList->addEntry(key, NULL, (void*) copy); + } + + if (key != _key) DELETEP(key); if (iRes == 0) return UT_TRUE; @@ -113,22 +124,22 @@ { UT_ASSERT(m_pIgnoreList); - char * key = (char*) calloc(len+1, sizeof(char)); - if (!key) - { - UT_DEBUGMSG(("mem failure looking up word in ignore all list\n")); - FREEP(key); - return UT_FALSE; - } + char _key[150], *key = _key; + if (len > 145) key = new char[len + 1]; for (UT_uint32 i = 0; i < len; i++) { - key[i] = (char) pWord[i]; + UT_UCSChar currentChar; + currentChar = pWord[i]; + // convert smart quote apostrophe to ASCII single quote + if (currentChar == UCS_RQUOTE) currentChar = '\''; + key[i] = (char) currentChar; } + key[len] = 0; UT_HashEntry * pHE = m_pIgnoreList->findEntry(key); - FREEP(key); + if (key != _key) DELETEP(key); if (pHE != NULL) return UT_TRUE; @@ -137,39 +148,39 @@ } -UT_Bool AD_Document::enumIgnores(UT_uint32 k, const UT_UCSChar * pszWord) const +UT_Bool AD_Document::enumIgnores(UT_uint32 k, const UT_UCSChar **pszWord) const { - UT_ASSERT(m_pIgnoreList); + UT_ASSERT(m_pIgnoreList); - if ((int)k >= m_pIgnoreList->getEntryCount()) - { - pszWord = NULL; - return UT_FALSE; - } - - UT_HashEntry * pHE = m_pIgnoreList->getNthEntry(k); + if ((int)k >= m_pIgnoreList->getEntryCount()) + { + *pszWord = NULL; + return UT_FALSE; + } - UT_ASSERT(pHE); + UT_HashEntry * pHE = m_pIgnoreList->getNthEntry(k); + + UT_ASSERT(pHE); - pszWord = (UT_UCSChar*) pHE->pData; - - return UT_TRUE; + *pszWord = (UT_UCSChar*) pHE->pData; + return UT_TRUE; } UT_Bool AD_Document::clearIgnores(void) { - UT_ASSERT(m_pIgnoreList); - - for (int i = 0; i < m_pIgnoreList->getEntryCount(); i++) { - UT_HashEntry * pHE = m_pIgnoreList->getNthEntry(i); - FREEP(pHE->pData); - } + UT_ASSERT(m_pIgnoreList); + + for (int i = 0; i < m_pIgnoreList->getEntryCount(); i++) + { + UT_HashEntry * pHE = m_pIgnoreList->getNthEntry(i); + DELETEP((UT_UCSChar *)pHE->pData); + } - DELETEP(m_pIgnoreList); + DELETEP(m_pIgnoreList); - m_pIgnoreList = new UT_AlphaHashTable(11); + m_pIgnoreList = new UT_AlphaHashTable(11); - UT_ASSERT(m_pIgnoreList); + UT_ASSERT(m_pIgnoreList); - return UT_TRUE; + return UT_TRUE; } diff -ru abi-090100-ORIG/src/af/xap/xp/xad_Document.h abi-090100/src/af/xap/xp/xad_Document.h --- abi-090100-ORIG/src/af/xap/xp/xad_Document.h Wed Jul 12 21:38:28 2000 +++ abi-090100/src/af/xap/xp/xad_Document.h Fri Sep 1 22:39:21 2000 @@ -55,7 +55,7 @@ // "ignore all" list for spell check UT_Bool appendIgnore(const UT_UCSChar * pWord, UT_uint32 len); UT_Bool isIgnore(const UT_UCSChar * pWord, UT_uint32 len) const; - UT_Bool enumIgnores(UT_uint32 k, const UT_UCSChar * pszWord) const; + UT_Bool enumIgnores(UT_uint32 k, const UT_UCSChar ** pszWord) const; UT_Bool clearIgnores(void); protected:



This archive was generated by hypermail 2b25 : Tue Sep 05 2000 - 12:03:30 CDT