patch: at long last, smart quotes!


Subject: patch: at long last, smart quotes!
From: WJCarpenter (bill-abisource@carpenter.ORG)
Date: Tue Aug 22 2000 - 10:27:38 CDT


The attached patch, against 082100 sources, implements on-the-fly
promotion of quote characters to curly "smart quotes". Everything is
XP. The patch is a bit larger than expected because I renamed some
spelling-specific functions to make that affiliation obvious.

I will have some notes on the implementation in a separate message.

-- 
bill@carpenter.ORG (WJCarpenter)    PGP 0x91865119
38 95 1B 69 C9 C6 3D 25    73 46 32 04 69 D6 ED F3

diff -ru abi-082100-ORIG/src/af/util/xp/ut_string.cpp abi-082100/src/af/util/xp/ut_string.cpp --- abi-082100-ORIG/src/af/util/xp/ut_string.cpp Mon Aug 7 15:22:38 2000 +++ abi-082100/src/af/util/xp/ut_string.cpp Mon Aug 21 23:37:29 2000 @@ -888,3 +888,42 @@ return out; #endif } + +UT_Bool UT_isSmartQuotableCharacter(UT_UCSChar c) +{ + // TODO: this is anglo-centric; really need a locale argument or + // TODO: something to get smart quote rules for the rest of the world + UT_Bool result; + switch (c) + { + case '"': + case '`': + case '\'': + result = UT_TRUE; + break; + default: + result = UT_FALSE; + break; + } + return (result); +} + +UT_Bool UT_isSmartQuotedCharacter(UT_UCSChar c) +{ + // TODO: this is anglo-centric; really need a locale argument or + // TODO: something to get smart quote rules for the rest of the world + UT_Bool result; + switch (c) + { + case UCS_LQUOTE: + case UCS_RQUOTE: + case UCS_LDBLQUOTE: + case UCS_RDBLQUOTE: + result = UT_TRUE; + break; + default: + result = UT_FALSE; + break; + } + return (result); +} diff -ru abi-082100-ORIG/src/af/util/xp/ut_string.h abi-082100/src/af/util/xp/ut_string.h --- abi-082100-ORIG/src/af/util/xp/ut_string.h Sun Aug 6 11:30:03 2000 +++ abi-082100/src/af/util/xp/ut_string.h Mon Aug 21 23:37:29 2000 @@ -94,11 +94,16 @@ XML_Char * UT_encodeUTF8char(UT_UCSChar cIn); XML_Char * UT_decodeXMLstring(XML_Char *pcIn); -#define UT_UCS_isdigit(x) (((x) >= '0') && ((x) <= '9')) +UT_Bool UT_isSmartQuotableCharacter(UT_UCSChar c); +UT_Bool UT_isSmartQuotedCharacter(UT_UCSChar c); + +#define UT_UCS_isdigit(x) (((x) >= '0') && ((x) <= '9')) // TODO: make UNICODE-wise #define UT_UCS_isupper(x) (((x) >= 'A') && ((x) <= 'Z')) // HACK: not UNICODE-safe #define UT_UCS_islower(x) (((x) >= 'a') && ((x) <= 'z')) // HACK: not UNICODE-safe #define UT_UCS_isalpha(x) (UT_UCS_isupper(x) || UT_UCS_islower(x)) // HACK: not UNICODE-safe #define UT_UCS_isalnum(x) (UT_UCS_isalpha(x) || UT_UCS_isdigit(x)) // HACK: not UNICODE-safe +#define UT_UCS_isspace(x) (((x)==' ' || ((x)=='\t') || ((x)=='\f'))) // HACK: not UNICODE safe +#define UT_UCS_ispunct(x) ((!UT_UCS_isspace(x) && !UT_UCS_isalnum(x) && (x)>' ')) // HACK: not UNICODE safe #ifdef WIN32 #define snprintf _snprintf diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.cpp abi-082100/src/text/fmt/xp/fl_BlockLayout.cpp --- abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.cpp Sun Aug 20 21:06:38 2000 +++ abi-082100/src/text/fmt/xp/fl_BlockLayout.cpp Mon Aug 21 23:37:29 2000 @@ -1689,17 +1689,17 @@ _moveSquiggles(iOffset, chg); // deal with pending word, if any - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { - if (!m_pLayout->touchesPendingWord(this, iOffset, 0)) + if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, 0)) { // not affected by insert, so check it - fl_PartOfBlock* pPending = m_pLayout->getPendingWord(); + fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell(); if (pPending->iOffset > iOffset) pPending->iOffset = (UT_uint32)((UT_sint32)pPending->iOffset + chg); - m_pLayout->checkPendingWord(); + m_pLayout->checkPendingWordForSpell(); } } @@ -1729,17 +1729,17 @@ _moveSquiggles(0, chg, pNewBL); // CF: math inside this function // deal with previously pending word, if any - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { - if (!m_pLayout->touchesPendingWord(this, iOffset, 0)) + if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, 0)) { // not affected by insert, so check it - fl_PartOfBlock* pPending = m_pLayout->getPendingWord(); + fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell(); if (pPending->iOffset > iOffset) pPending->iOffset = (UT_uint32)((UT_sint32)pPending->iOffset + chg); - m_pLayout->checkPendingWord(); + m_pLayout->checkPendingWordForSpell(); } } @@ -1774,17 +1774,17 @@ _moveSquiggles(iOffset, chg); // deal with pending word, if any - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { - if (!m_pLayout->touchesPendingWord(this, iOffset, chg)) + if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, chg)) { // not affected by delete, so check it - fl_PartOfBlock* pPending = m_pLayout->getPendingWord(); + fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell(); if (pPending->iOffset > iOffset) pPending->iOffset = (UT_uint32)((UT_sint32)pPending->iOffset + chg); - m_pLayout->checkPendingWord(); + m_pLayout->checkPendingWordForSpell(); } } @@ -1792,7 +1792,7 @@ _recalcPendingWord(iOffset, chg); // check the newly pending word -// m_pLayout->checkPendingWord(); +// m_pLayout->checkPendingWordForSpell(); #else m_pLayout->queueBlockForSpell(this); #endif @@ -1808,17 +1808,17 @@ _moveSquiggles(0, chg, pPrevBL); // deal with previously pending word, if any - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { - if (!m_pLayout->touchesPendingWord(this, iOffset, chg)) + if (!m_pLayout->touchesPendingWordForSpell(this, iOffset, chg)) { // not affected by delete, so check it - fl_PartOfBlock* pPending = m_pLayout->getPendingWord(); + fl_PartOfBlock* pPending = m_pLayout->getPendingWordForSpell(); if (pPending->iOffset > iOffset) pPending->iOffset = (UT_uint32)((UT_sint32)pPending->iOffset + chg); - m_pLayout->checkPendingWord(); + m_pLayout->checkPendingWordForSpell(); } } @@ -1837,7 +1837,7 @@ // If spell-check-as-you-type is off, we don't want a pending word at all if (!m_pLayout->getAutoSpellCheck()) { - m_pLayout->setPendingWord(NULL, NULL); + m_pLayout->setPendingWordForSpell(NULL, NULL); return; } @@ -1921,9 +1921,9 @@ fl_PartOfBlock* pPending = NULL; UT_Bool bNew = UT_FALSE; - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { - pPending = m_pLayout->getPendingWord(); + pPending = m_pLayout->getPendingWordForSpell(); UT_ASSERT(pPending); } @@ -1940,13 +1940,13 @@ pPending->iLength = iLen; if (bNew) - m_pLayout->setPendingWord(this, pPending); + m_pLayout->setPendingWordForSpell(this, pPending); } } else { // not pending any more - m_pLayout->setPendingWord(NULL, NULL); + m_pLayout->setPendingWordForSpell(NULL, NULL); } } @@ -2636,6 +2636,13 @@ UT_uint32 iNormalBase = 0; UT_Bool bNormal = UT_FALSE; UT_uint32 i; + UT_uint32 _sqlist[100], *sqlist = _sqlist; + UT_uint32 sqcount = 0; + if (sizeof(_sqlist) / sizeof(_sqlist[0]) < len) + { + sqlist = new UT_uint32(len); + } + xxx_UT_DEBUGMSG(("fl_BlockLayout::doclistener_insertSpan(), len=%d, c=|%c|\n", len, pChars[0])); for (i=0; i<len; i++) { switch (pChars[i]) @@ -2679,6 +2686,13 @@ break; default: + if ((i != len-1) && UT_isSmartQuotableCharacter(pChars[i])) + { + // accumulate smart quote candidates and deal with them + // as a bunch below after the final text insertion has + // been dealt with + sqlist[sqcount++] = blockOffset + i; + } if (!bNormal) { bNormal = UT_TRUE; @@ -2705,6 +2719,26 @@ pView->_setPoint(pcrs->getPosition()+len); pView->notifyListeners(AV_CHG_FMTCHAR); // TODO verify that this is necessary. } + + if (m_pLayout->hasBackgroundCheckReason(FL_DocLayout::bgcrSmartQuotes)) + { + fl_BlockLayout *sq_bl = m_pLayout->getPendingBlockForSmartQuote(); + UT_uint32 sq_of = m_pLayout->getOffsetForSmartQuote(); + m_pLayout->setPendingSmartQuote(NULL, 0); + if (sq_bl) + { + m_pLayout->considerSmartQuoteCandidateAt(sq_bl, sq_of); + } + for (unsigned int sdex=0; sdex<sqcount; ++sdex) + { + m_pLayout->considerSmartQuoteCandidateAt(this, sqlist[sdex]); + } + if (UT_isSmartQuotableCharacter(pChars[len - 1])) + { + m_pLayout->setPendingSmartQuote(this, blockOffset + len - 1); + } + } + if (sqlist != _sqlist) delete(sqlist); if (m_pLayout->getAutoSpellCheck()) _insertSquiggles(blockOffset, len); diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.h abi-082100/src/text/fmt/xp/fl_BlockLayout.h --- abi-082100-ORIG/src/text/fmt/xp/fl_BlockLayout.h Sun Aug 20 21:06:38 2000 +++ abi-082100/src/text/fmt/xp/fl_BlockLayout.h Mon Aug 21 23:37:29 2000 @@ -454,7 +454,7 @@ /* This class is used to represent a part of the block. Pointers to this class are the things contained in m_vecSquiggles and in - FL_DocLayout::m_pPendingWord. + FL_DocLayout::m_pPendingWordForSpell */ class fl_PartOfBlock { diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.cpp abi-082100/src/text/fmt/xp/fl_DocLayout.cpp --- abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.cpp Tue Aug 15 17:45:54 2000 +++ abi-082100/src/text/fmt/xp/fl_DocLayout.cpp Mon Aug 21 23:37:29 2000 @@ -27,6 +27,7 @@ #include "fl_SectionLayout.h" #include "fl_BlockLayout.h" #include "fp_Page.h" +#include "fp_Run.h" #include "fv_View.h" #include "pd_Document.h" #include "pp_Property.h" @@ -38,7 +39,7 @@ #include "ut_debugmsg.h" #include "ut_assert.h" #include "ut_timer.h" - +#include "ut_string.h" #define REDRAW_UPDATE_MSECS 500 @@ -50,8 +51,10 @@ m_pG = pG; m_pView = NULL; m_pBackgroundCheckTimer = NULL; - m_pPendingBlock = NULL; - m_pPendingWord = NULL; + m_pPendingBlockForSpell = NULL; + m_pPendingWordForSpell = NULL; + m_pPendingBlockForSmartQuote = NULL; + m_uOffsetForSmartQuote = 0; m_pFirstSection = NULL; m_pLastSection = NULL; m_bSpellCheckCaps = UT_TRUE; @@ -100,7 +103,7 @@ } DELETEP(m_pBackgroundCheckTimer); - DELETEP(m_pPendingWord); + DELETEP(m_pPendingWordForSpell); if (m_pRedrawUpdateTimer) { @@ -585,11 +588,26 @@ // to FALSE. This means that it is the user setting it. That's good. m_pView->draw(NULL); // A pending word would be bad. Not sure why it's not ignored once autospell is off, but for now it should definattely be annulled. - setPendingWord(NULL, NULL); + setPendingWordForSpell(NULL, NULL); } } } +void FL_DocLayout::_toggleAutoSmartQuotes(UT_Bool bSQ) +{ + setPendingSmartQuote(NULL, 0); // avoid surprises + if (bSQ) + { + addBackgroundCheckReason(bgcrSmartQuotes); + } + else + { + removeBackgroundCheckReason(bgcrSmartQuotes); + } + + UT_DEBUGMSG(("FL_DocLayout::_toggleAutoSmartQuotes(%s)\n", bSQ ? "UT_TRUE" : "UT_FALSE" )); +} + void FL_DocLayout::_backgroundCheck(UT_Timer * pTimer) { UT_ASSERT(pTimer); @@ -729,31 +747,31 @@ } } -void FL_DocLayout::setPendingWord(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord) +void FL_DocLayout::setPendingWordForSpell(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord) { - if ((pBlock == m_pPendingBlock) && - (pWord == m_pPendingWord)) + if ((pBlock == m_pPendingBlockForSpell) && + (pWord == m_pPendingWordForSpell)) return; - UT_ASSERT(!m_pPendingBlock || !pBlock); + UT_ASSERT(!m_pPendingBlockForSpell || !pBlock); - if (pBlock && m_pPendingBlock && m_pPendingWord) + if (pBlock && m_pPendingBlockForSpell && m_pPendingWordForSpell) { UT_ASSERT(pWord); } // when clobbering prior POB, make sure we don't leak it - FREEP(m_pPendingWord); + FREEP(m_pPendingWordForSpell); - m_pPendingBlock = pBlock; - m_pPendingWord = pWord; + m_pPendingBlockForSpell = pBlock; + m_pPendingWordForSpell = pWord; } -UT_Bool FL_DocLayout::checkPendingWord(void) +UT_Bool FL_DocLayout::checkPendingWordForSpell(void) { UT_Bool bUpdate = UT_FALSE; - if (!m_pPendingBlock) + if (!m_pPendingBlockForSpell) return bUpdate; if(m_pView->dontSpellCheckRightNow() == UT_TRUE) @@ -762,40 +780,40 @@ } // check pending word - UT_ASSERT(m_pPendingWord); - bUpdate = m_pPendingBlock->checkWord(m_pPendingWord); + UT_ASSERT(m_pPendingWordForSpell); + bUpdate = m_pPendingBlockForSpell->checkWord(m_pPendingWordForSpell); - m_pPendingWord = NULL; // NB: already freed by checkWord + m_pPendingWordForSpell = NULL; // NB: already freed by checkWord // not pending any more - setPendingWord(NULL, NULL); + setPendingWordForSpell(NULL, NULL); return bUpdate; } -UT_Bool FL_DocLayout::isPendingWord(void) const +UT_Bool FL_DocLayout::isPendingWordForSpell(void) const { - return (m_pPendingBlock ? UT_TRUE : UT_FALSE); + return (m_pPendingBlockForSpell ? UT_TRUE : UT_FALSE); } -UT_Bool FL_DocLayout::touchesPendingWord(fl_BlockLayout *pBlock, +UT_Bool FL_DocLayout::touchesPendingWordForSpell(fl_BlockLayout *pBlock, UT_uint32 iOffset, UT_sint32 chg) const { UT_uint32 len = (chg < 0) ? -chg : 0; - if (!m_pPendingBlock) + if (!m_pPendingBlockForSpell) return UT_FALSE; UT_ASSERT(pBlock); // are we in the same block? - if (m_pPendingBlock != pBlock) + if (m_pPendingBlockForSpell != pBlock) return UT_FALSE; - UT_ASSERT(m_pPendingWord); + UT_ASSERT(m_pPendingWordForSpell); - return m_pPendingWord->doesTouch(iOffset, len); + return m_pPendingWordForSpell->doesTouch(iOffset, len); } void FL_DocLayout::addSection(fl_DocSectionLayout* pSL) @@ -940,6 +958,9 @@ // TODO: recheck document ; } + + pPrefs->getPrefsValueBool( (XML_Char *)XAP_PREF_KEY_SmartQuotesEnable, &b ); + pDocLayout->_toggleAutoSmartQuotes( b ); } void FL_DocLayout::recheckIgnoredWords() @@ -1017,5 +1038,424 @@ // TODO return PageSize initialized by prefs. return fp_PageSize(fp_PageSize::Letter); +} + +void FL_DocLayout::setPendingSmartQuote(fl_BlockLayout *bl, UT_uint32 of) +{ + UT_DEBUGMSG(("FL_DocLayout::setPendingSmartQuote(%x, %d)\n", bl, of)); + m_pPendingBlockForSmartQuote = bl; + m_uOffsetForSmartQuote = of; +} + +/* wjc sez.... + +This algorithm is based on my observation of how people actually use +quotation marks, sometimes in contravention of generally accepted +principals of punctuation. It is certainly also true that my +observations are overwhelmingly of American English text, with a +smattering of various other languages observed from time to time. I +don't believe that any algorithm for this can ever be perfect. There +are too many infrequently-occurring but legitimate cases where a user +might want something else. FWIW, I haven't tested out the specifics +of the smart quote algorithm in ThatOtherWordProcessor. + +Some terms for the purpose of this discussion (I'm open to plenty of +advice on what specific items should fit in each of these classes): + +sqBREAK A structural break in a document. For example, a paragraph + break, a column break, a page break, the beginning or end of a + document, etc. Does not include font, size, bold/italic/underline + changes (which are completely ignored for the purposes of this + algorithm). + +sqFOLLOWPUNCT A subset of layman's "punctuation". I include only + things that can normally occur after a quote mark with no intervening + white space. Includes period, exclamation point, question mark, + semi-colon, colon, comma (but not parentheses, square and curly + brackets, which are treated specially below). There may be a few + others that aren't on the kinds of keyboards I use, and there are + certainly Latin1 and other locale-specific variants, but the point + is that there are lots of random non-alphanumerics which aren't + included in *PUNCT for this algorithm. + +sqOPENPUNCT The opening half of pairwise, non-quote punctuation. Open + parenthesis, open square bracket, open curly brace. + +sqCLOSEPUNCT The closing half of pairwise, non-quote punctuation. Close + parenthesis, close square bracket, close curly brace. + +[[The idea about open and close punctuation was found in a mid-1980s +note by Dave Dunham, brought to my attention by Leonard Rosenthol +<leonardr@lazerware.com>.]] + +sqOTHERPUNCT Punctuation which is not sqFOLLOWPUNCT, sqOPENPUNCT, or + sqCLOSEPUNCT. + +sqALPHA Alphabetic characters in the C isalpha() sense, but there are + certainly some non-ASCII letter characters which belong in this + bucket, too. + +sqWHITE White speace haracters in the C isspace() sense. + +QUOTE Any of ASCII double quote, ASCII quote (which many people call + the ASCII single quote or the ASCII apostrophe), or ASCII backquote. + I take it as given that a significant minority of people randomly or + systematically interchange their use of ASCII quote and ASCII + backquote, so I treat them the same in the algorithm. The majority + of people use ASCII quote for both opening and closing single quote. + +PARITY Whether a quote is single or double. For ease of description, + I'll say that the parity of single and double quotes are opposites + of each other. When QUOTEs are converted to curly form, the parity + never changes. + +================================================================ + +Given a QUOTE character, these conditions/rules are logically tested in +order: + +0. OK, first an easy exception case: If ASCII (single) quote (but not +ASCII backquote) appears between two sqALPHAs, it may be treated as an +apostrophe and converted to its curly form. Otherwise, it is treated +like all other QUOTEs and follows the normal algorithm. + +1. If a QUOTE is immediately preceded by a curly quote of opposite +parity, it is converted to a curly quote in the same direction. + +2. If a QUOTE is immediately preceded by a curly quote of the same +parity, it is converted to a curly quote of opposite direction. + +3. If a QUOTE is immediately followed by a curly quote of opposite +parity, it is converted to a curly quote in the same direction. + +4. If a QUOTE is immediately followed by a curly quote of the same +parity, it is converted to a curly quote of opposite direction. + +[[The above cases are intended to handle normal nested quotes or cases +where quotes enclose empty strings. Different cultures use different +parities as start points for nested quotes, but the algorithm doesn't +care.]] + +5. If a QUOTE is immediately preceded by an sqOPENPUNCT, it is +converted to a curly quote in the open direction. + +6. If a QUOTE is immediately followed by a sqCLOSEPUNCT, it is +converted to a curly quote in the close direction. + +7. If a QUOTE is in isolation, it is not converted. It is in +isolation if it is immediately preceded and followed by either a sqBREAK +or sqWHITE. The things before and after it don't have to be of +the same type. + +8. If a QUOTE is immediately preceded by a sqBREAK or sqWHITE and +is immediately followed by anything other than a sqBREAK or sqWHITE, +it is converted to the opening form of curly quote. + +9. If a QUOTE is immediately followed by a sqBREAK, sqWHITE, or +sqFOLLOWPUNCT and is immediately preceded by anything other than sqBREAK +or sqWHITE, it is converted to the closing form of curly quote. + +10. Any other QUOTE is not converted. + +================================================================ + +The algorithm doesn't make a special case of using ASCII double quote +as an inches indicator (there are other uses, like lat/long minutes; +ditto for the ASCII quote) because it is tough to tell if some numbers +with an ASCII double quote after them are intended to be one of those +"other things" or is just the end of a very long quote. So, the +algorithm will be wrong sometimes in those cases. + +It is otherwise sort of conservative, preferring to not convert things +it doesn't feel confident about. The reason for that is that there is +a contemplated on-the-fly conversion to smart quotes, but there is no +contemplated on-the-fly conversion to ASCII QUOTEs. So, if the +algorithm makes a mistake by not converting, the user can correct it +by directly entering the appropriate smart quote character or by +heuristically tricking AbiWord into converting it for him/her and then +fixing things up. (That heuristic step shouldn't be necessary, you +know, but I think we all use software for which we have become +accustomed to such things.) + +What about the occasions when this algorithm (or any alternative +algorithm) makes a mistake and converts a QUOTE to the curly form when +it really isn't wanted, in a particular case, by the user? Although +the user can change it back, some contemplated implementation details +might run around behind the barn and re-convert it when the user isn't +looking. I think we need a mechanism for dealing with that, but I +want to save proposals for that to be separate from the basic +algorithm. +*/ + +// The following are descriptions of the thing before or after a +// character being considered for smart quote promotion. The thing +// is either a structural break in a document, or it is a literal +// character that is part of some class (in some cases the class is +// so small it has only one possible member). The classes should +// look familar from the algorithm above. There is a special class +// used only for the coding of rule: sqDONTCARE in a rule means it +// doesn't matter what occurs in that position. +enum sqThingAt +{ + sqDONTCARE, + sqQUOTEls, sqQUOTErs, sqQUOTEld, sqQUOTErd, // the smart quotes, left/right single/double + sqBREAK, sqFOLLOWPUNCT, sqOPENPUNCT, sqCLOSEPUNCT, sqOTHERPUNCT, sqALPHA, sqWHITE +}; + +// TODO: This function probably needs tuning for non-Anglo locales. +static enum sqThingAt whatKindOfChar(UT_UCSChar thing) +{ + switch (thing) + { + case UCS_LQUOTE: return sqQUOTEls; + case UCS_RQUOTE: return sqQUOTErs; + case UCS_LDBLQUOTE: return sqQUOTEld; + case UCS_RDBLQUOTE: return sqQUOTErd; + + case '(': case '{': case '[': return sqOPENPUNCT; + case ')': case '}': case ']': return sqCLOSEPUNCT; + + case '.': case ',': case ';': case ':': case '!': case '?': return sqFOLLOWPUNCT; + + } + if (UT_UCS_isalpha(thing)) return sqALPHA; + if (UT_UCS_ispunct(thing)) return sqOTHERPUNCT; + if (UT_UCS_isspace(thing)) return sqWHITE; + + return sqDONTCARE; +} + +struct sqTable +{ + enum sqThingAt before; + UT_UCSChar thing; + enum sqThingAt after; + UT_UCSChar replacement; +}; +// The idea of the table is to drive the algorithm without lots of +// cluttery code. Something using this table pre-computes what the +// things are before and after the character in question, and then +// dances through this table looking for a match on all three. +// The final item in each row is the character to use to replace +// the candidate character. +// +// (Yeah, this table is big, but it is only used when a quote character +// shows up in typing or in a paste, and it goes pretty fast.) +// +// sqDONTCARE is like a wild card for the thing before or after, and +// UCS_UNKPUNK in the replacement position means don't do a replacement. +static struct sqTable sqTable_en[] = +{ + {sqALPHA, '\'', sqALPHA, UCS_RQUOTE}, // rule 0 + {sqALPHA, '`', sqALPHA, UCS_RQUOTE}, // rule 0 + + {sqQUOTEld, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 1 + {sqQUOTErd, '\'', sqDONTCARE, UCS_RQUOTE}, // rule 1 + + {sqQUOTEld, '`', sqDONTCARE, UCS_LQUOTE}, // rule 1 + {sqQUOTErd, '`', sqDONTCARE, UCS_RQUOTE}, // rule 1 + + {sqQUOTEls, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 1 + {sqQUOTErs, '"', sqDONTCARE, UCS_RDBLQUOTE}, // rule 1 + + {sqQUOTEls, '\'', sqDONTCARE, UCS_RQUOTE}, // rule 2 + {sqQUOTErs, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 2 + + {sqQUOTEls, '`', sqDONTCARE, UCS_RQUOTE}, // rule 2 + {sqQUOTErs, '`', sqDONTCARE, UCS_LQUOTE}, // rule 2 + + {sqQUOTEld, '"', sqDONTCARE, UCS_RDBLQUOTE}, // rule 2 + {sqQUOTErd, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 2 + + {sqDONTCARE, '\'', sqQUOTEld, UCS_LQUOTE}, // rule 3 + {sqDONTCARE, '\'', sqQUOTErd, UCS_RQUOTE}, // rule 3 + + {sqDONTCARE, '`', sqQUOTEld, UCS_LQUOTE}, // rule 3 + {sqDONTCARE, '`', sqQUOTErd, UCS_RQUOTE}, // rule 3 + + {sqDONTCARE, '"', sqQUOTEls, UCS_LDBLQUOTE}, // rule 3 + {sqDONTCARE, '"', sqQUOTErs, UCS_RDBLQUOTE}, // rule 3 + + {sqDONTCARE, '\'', sqQUOTEls, UCS_RQUOTE}, // rule 4 + {sqDONTCARE, '\'', sqQUOTErs, UCS_LQUOTE}, // rule 4 + + {sqDONTCARE, '`', sqQUOTEls, UCS_RQUOTE}, // rule 4 + {sqDONTCARE, '`', sqQUOTErs, UCS_LQUOTE}, // rule 4 + + {sqDONTCARE, '"', sqQUOTEld, UCS_RDBLQUOTE}, // rule 4 + {sqDONTCARE, '"', sqQUOTErd, UCS_LDBLQUOTE}, // rule 4 + + {sqOPENPUNCT, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 5 + {sqOPENPUNCT, '`', sqDONTCARE, UCS_LQUOTE}, // rule 5 + {sqOPENPUNCT, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 5 + + {sqDONTCARE, '\'', sqCLOSEPUNCT, UCS_RQUOTE}, // rule 6 + {sqDONTCARE, '`', sqCLOSEPUNCT, UCS_RQUOTE}, // rule 6 + {sqDONTCARE, '"', sqCLOSEPUNCT, UCS_RDBLQUOTE}, // rule 6 + + {sqBREAK, '\'', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '\'', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqBREAK, '\'', sqWHITE, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '\'', sqWHITE, UCS_UNKPUNK}, // rule 7 + + {sqBREAK, '`', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '`', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqBREAK, '`', sqWHITE, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '`', sqWHITE, UCS_UNKPUNK}, // rule 7 + + {sqBREAK, '"', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '"', sqBREAK, UCS_UNKPUNK}, // rule 7 + {sqBREAK, '"', sqWHITE, UCS_UNKPUNK}, // rule 7 + {sqWHITE, '"', sqWHITE, UCS_UNKPUNK}, // rule 7 + + {sqBREAK, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 8 + {sqWHITE, '\'', sqDONTCARE, UCS_LQUOTE}, // rule 8 + + {sqBREAK, '`', sqDONTCARE, UCS_LQUOTE}, // rule 8 + {sqWHITE, '`', sqDONTCARE, UCS_LQUOTE}, // rule 8 + + {sqBREAK, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 8 + {sqWHITE, '"', sqDONTCARE, UCS_LDBLQUOTE}, // rule 8 + + {sqDONTCARE, '\'', sqBREAK, UCS_RQUOTE}, // rule 9 + {sqDONTCARE, '\'', sqWHITE, UCS_RQUOTE}, // rule 9 + {sqDONTCARE, '\'', sqFOLLOWPUNCT, UCS_RQUOTE}, // rule 9 + + {sqDONTCARE, '`', sqBREAK, UCS_RQUOTE}, // rule 9 + {sqDONTCARE, '`', sqWHITE, UCS_RQUOTE}, // rule 9 + {sqDONTCARE, '`', sqFOLLOWPUNCT, UCS_RQUOTE}, // rule 9 + + {sqDONTCARE, '"', sqBREAK, UCS_RDBLQUOTE}, // rule 9 + {sqDONTCARE, '"', sqWHITE, UCS_RDBLQUOTE}, // rule 9 + {sqDONTCARE, '"', sqFOLLOWPUNCT, UCS_RDBLQUOTE}, // rule 9 + + // following rules are the same as falling off the end of the list... + + //{sqDONTCARE, '\'', sqDONTCARE, UCS_UNKPUNK}, // rule 10 + //{sqDONTCARE, '`', sqDONTCARE, UCS_UNKPUNK}, // rule 10 + //{sqDONTCARE, '"', sqDONTCARE, UCS_UNKPUNK}, // rule 10 + + {sqDONTCARE, 0, sqDONTCARE, UCS_UNKPUNK} // signals end of table +}; + +void FL_DocLayout::considerSmartQuoteCandidateAt(fl_BlockLayout *block, UT_uint32 offset) +{ + if (!block) return; + setPendingSmartQuote(NULL, 0); // avoid recursion + UT_GrowBuf pgb(1024); + block->getBlockBuf(&pgb); + // this is for the benefit of the UT_DEBUGMSG and should be changed to + // something other than '?' if '?' ever shows up as UT_isSmartQuotableCharacter() + UT_UCSChar c = '?'; + if (pgb.getLength() > offset) c = *pgb.getPointer(offset); + xxx_UT_DEBUGMSG(("FL_DocLayout::considerSmartQuoteCandidateAt(%x, %d) |%c|\n", block, offset, c)); + + // there are some operations that leave a dangling pending + // smart quote, so just double check before plunging onward + if (UT_isSmartQuotableCharacter(c)) + { + enum sqThingAt before = sqBREAK, after = sqBREAK; + if (offset > 0) + { + // TODO: is there a need to see if this is on a run boundary? + // TODO: Within a block, are there runs that are significant + // TODO: breaks or whatever? + before = whatKindOfChar(*pgb.getPointer(offset - 1)); + } + else + { + // candidate was the first character in the block, so + // see what was at the end of the previous block, if any + fl_BlockLayout *ob = block->getPrev(); + if (ob) + { + fp_Run *last, *r = ob->getFirstRun(); + do + { + last = r; + } while ((r = r->getNext())); // assignment + if (last && (FPRUN_TEXT == last->getType())) + { + // last run of previous block was a text run, + // so find out what the final character was + UT_GrowBuf pgb_b(1024); + ob->getBlockBuf(&pgb_b); + if (pgb_b.getLength()) + { + before = whatKindOfChar(*pgb_b.getPointer(pgb.getLength()-1)); + } + } + } + } + + if (offset+1 < pgb.getLength()) + { + // TODO: is there a need to see if this is on a run boundary? + // TODO: Within a block, are there runs that are significant + // TODO: breaks or whatever? + after = whatKindOfChar(*pgb.getPointer(offset + 1)); + } + else + { + // candidate was the last character in a block, so see + // what's at the beginning of the next block, if any + fl_BlockLayout *ob = block->getNext(); + if (ob) + { + fp_Run *r = ob->getFirstRun(); + if (r && (FPRUN_TEXT == r->getType())) + { + // first run of next block is a text run, so + // see what the first character was + UT_GrowBuf pgb_a(1024); + ob->getBlockBuf(&pgb_a); + if (pgb_a.getLength()) + { + after = whatKindOfChar(*pgb_a.getPointer(0)); + } + } + } + } + + // we now know what the before and after things are, so + // spin through the table. + UT_UCSChar replacement = UCS_UNKPUNK; // means don't replace + // TODO: select a table based on default locale or on the locale + // TODO: of the fragment of text we're working in (locale tagging + // TODO: of text doesn't exist in Abi as of this writing) + struct sqTable *table = sqTable_en; + for (unsigned int tdex=0; table[tdex].thing; ++tdex) + { + if (c != table[tdex].thing) continue; + if (table[tdex].before == sqDONTCARE || table[tdex].before == before) + { + if (table[tdex].after == sqDONTCARE || table[tdex].after == after) + { + replacement = table[tdex].replacement; + break; + } + } + } + if (replacement != UCS_UNKPUNK) + { + // your basic emacs (save-excursion...) :-) + PT_DocPosition saved_pos, quotable_at; + saved_pos = m_pView->getPoint(); + quotable_at = block->getPosition(UT_FALSE) + offset; + m_pView->moveInsPtTo(quotable_at); + // delete/insert create change records for UNDO + m_pView->cmdCharDelete(UT_TRUE, 1); + m_pView->cmdCharInsert(&replacement, 1); + m_pView->moveInsPtTo(saved_pos); + // Alas, Abi undo moves the insertion point, so you can't + // just UNDO right after a smart quote pops up to force + // an ASCII quote. For an open quote, you could type + // " backspace to get it (in other words, quote, space, + // backspace. The space will prevent the smart quote + // promotion (no magic ... just following the rules). + // For a close quote, type "/backspace (quote, slash, backspace) + // for similar reasons. + } + } } diff -ru abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.h abi-082100/src/text/fmt/xp/fl_DocLayout.h --- abi-082100-ORIG/src/text/fmt/xp/fl_DocLayout.h Mon Jul 31 22:38:30 2000 +++ abi-082100/src/text/fmt/xp/fl_DocLayout.h Mon Aug 21 23:37:29 2000 @@ -84,9 +84,22 @@ inline FV_View * getView(void) const { return m_pView; } inline GR_Graphics* getGraphics(void) const { return m_pG; } inline PD_Document* getDocument(void) const { return m_pDoc; } - inline fl_BlockLayout* getPendingBlock(void) const { return m_pPendingBlock; }; - inline fl_PartOfBlock* getPendingWord(void) const { return m_pPendingWord; }; + inline fl_BlockLayout* getPendingBlockForSpell(void) const { return m_pPendingBlockForSpell; }; + inline fl_PartOfBlock* getPendingWordForSpell(void) const { return m_pPendingWordForSpell; }; + // The smart quote stuff works by listening for insertions (typing and paste) and motion. + // It needs one character of type-ahead before working the algorithm, so a single + // quote character going by is remembered as "pending". After the type-ahead (or + // motion) occurs, the pending quote is considered for promotion. For an insertion + // of multiple characters (which probably just means a paste), all smart quote consideration + // can be done immediately except for a quote occuring in the very last character + // of the stuff being inserted. + inline fl_BlockLayout* getPendingBlockForSmartQuote(void) const { return m_pPendingBlockForSmartQuote; }; + inline UT_uint32 getOffsetForSmartQuote(void) const { return m_uOffsetForSmartQuote; }; + void setPendingSmartQuote(fl_BlockLayout *block, UT_uint32 offset); + void considerSmartQuoteCandidateAt(fl_BlockLayout *block, UT_uint32 offset); + inline void considerPendingSmartQuoteCandidate() {considerSmartQuoteCandidateAt(m_pPendingBlockForSmartQuote, m_uOffsetForSmartQuote); } + UT_sint32 getHeight(); UT_sint32 getWidth(); @@ -119,12 +132,12 @@ void formatAll(); void updateLayout(); - UT_Bool isPendingWord(void) const; - UT_Bool touchesPendingWord(fl_BlockLayout *pBlock, + UT_Bool isPendingWordForSpell(void) const; + UT_Bool touchesPendingWordForSpell(fl_BlockLayout *pBlock, UT_uint32 iOffset, UT_sint32 chg) const; - void setPendingWord(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord); - UT_Bool checkPendingWord(void); + void setPendingWordForSpell(fl_BlockLayout *pBlock, fl_PartOfBlock* pWord); + UT_Bool checkPendingWordForSpell(void); void queueBlockForBackgroundCheck(UT_uint32 reason, fl_BlockLayout *pBlock, UT_Bool bHead=UT_FALSE); void dequeueBlockForBackgroundCheck(fl_BlockLayout *pBlock); @@ -159,7 +172,7 @@ { bgcrDebugFlash = (1 << 0), bgcrSpelling = (1 << 1), - bgcrSmartQuotes = (1 << 2) + bgcrSmartQuotes = (1 << 2) // ha! we're not using background checks for this after all }; #ifdef FMT_TEST @@ -171,6 +184,7 @@ protected: static void _backgroundCheck(UT_Timer * pTimer); void _toggleAutoSpell(UT_Bool bSpell); + void _toggleAutoSmartQuotes(UT_Bool bSQ); static void _prefsListener(class XAP_App *, class XAP_Prefs *, class UT_AlphaHashTable *, void *); @@ -193,11 +207,15 @@ // spell check stuff UT_Vector m_vecUncheckedBlocks; - fl_BlockLayout* m_pPendingBlock; // if NULL, then ignore m_pPendingWord - fl_PartOfBlock* m_pPendingWord; + fl_BlockLayout* m_pPendingBlockForSpell; // if NULL, then ignore m_pPendingWordForSpell + fl_PartOfBlock* m_pPendingWordForSpell; UT_Bool m_bSpellCheckCaps; UT_Bool m_bSpellCheckNumbers; UT_Bool m_bSpellCheckInternet; + + // smart quote latent instance + fl_BlockLayout* m_pPendingBlockForSmartQuote; // if NULL, ignore m_uOffsetForSmartQuote + UT_uint32 m_uOffsetForSmartQuote; UT_Timer* m_pBackgroundCheckTimer; UT_uint32 m_uBackgroundCheckReasons; // bit flags diff -ru abi-082100-ORIG/src/text/fmt/xp/fv_View.cpp abi-082100/src/text/fmt/xp/fv_View.cpp --- abi-082100-ORIG/src/text/fmt/xp/fv_View.cpp Sun Aug 20 21:06:38 2000 +++ abi-082100/src/text/fmt/xp/fv_View.cpp Mon Aug 21 23:41:54 2000 @@ -1348,10 +1348,11 @@ _drawInsertionPoint(); } + m_pLayout->considerPendingSmartQuoteCandidate(); // Signal Spell checks are safe again - m_bdontSpellCheckRightNow = UT_FALSE; - _checkPendingWord(); + m_bdontSpellCheckRightNow = UT_FALSE; + _checkPendingWordForSpell(); } @@ -5169,27 +5170,28 @@ m_iInsPoint = pt; m_bPointEOL = bEOL; - _checkPendingWord(); + m_pLayout->considerPendingSmartQuoteCandidate(); + _checkPendingWordForSpell(); } -void FV_View::_checkPendingWord(void) +void FV_View::_checkPendingWordForSpell(void) { - if(m_bdontSpellCheckRightNow == UT_TRUE) + if(m_bdontSpellCheckRightNow == UT_TRUE) { - return; + return; } // deal with pending word, if any - if (m_pLayout->isPendingWord()) + if (m_pLayout->isPendingWordForSpell()) { fl_BlockLayout* pBL = _findBlockAtPosition(m_iInsPoint); if (pBL) { UT_uint32 iOffset = m_iInsPoint - pBL->getPosition(); - if (!m_pLayout->touchesPendingWord(pBL, iOffset, 0)) + if (!m_pLayout->touchesPendingWordForSpell(pBL, iOffset, 0)) { // no longer there, so check it - if (m_pLayout->checkPendingWord()) + if (m_pLayout->checkPendingWordForSpell()) updateScreen(); } } @@ -5233,7 +5235,8 @@ if (m_iInsPoint != posOld) { - _checkPendingWord(); + m_pLayout->considerPendingSmartQuoteCandidate(); + _checkPendingWordForSpell(); _clearIfAtFmtMark(posOld); notifyListeners(AV_CHG_MOTION); } @@ -5250,7 +5253,8 @@ if (m_iInsPoint != posOld) { - _checkPendingWord(); + m_pLayout->considerPendingSmartQuoteCandidate(); + _checkPendingWordForSpell(); _clearIfAtFmtMark(posOld); notifyListeners(AV_CHG_MOTION); } @@ -5260,7 +5264,8 @@ if (m_iInsPoint != posOld) { - _checkPendingWord(); + m_pLayout->considerPendingSmartQuoteCandidate(); + _checkPendingWordForSpell(); _clearIfAtFmtMark(posOld); notifyListeners(AV_CHG_MOTION); } diff -ru abi-082100-ORIG/src/text/fmt/xp/fv_View.h abi-082100/src/text/fmt/xp/fv_View.h --- abi-082100-ORIG/src/text/fmt/xp/fv_View.h Wed Aug 16 19:56:45 2000 +++ abi-082100/src/text/fmt/xp/fv_View.h Mon Aug 21 23:37:29 2000 @@ -335,10 +335,10 @@ void _doPaste(UT_Bool bUseClipboard); void _clearIfAtFmtMark(PT_DocPosition dpos); - void _checkPendingWord(void); + void _checkPendingWordForSpell(void); - UT_Bool _insertHeaderFooter(const XML_Char ** props, UT_Bool ftr); + UT_Bool _insertHeaderFooter(const XML_Char ** props, UT_Bool ftr); PT_DocPosition m_iInsPoint;



This archive was generated by hypermail 2b25 : Tue Aug 22 2000 - 11:54:26 CDT