please commit - fix for bug 223


Subject: please commit - fix for bug 223
From: Jesper Skov (jskov@redhat.com)
Date: Sat Feb 03 2001 - 15:39:05 CST


This fixes bug 223 (and possibly 436 - haven't tested yet). Problem
was that fields (objects in general) got encoded as spaces to the
buffer used for finding word limits. I replaced the spaces with the
letter A (using a macro) and added references to the bug + some
explanations.

Jesper

Index: af/util/xp/ut_types.h
===================================================================
RCS file: /cvsroot/abi/src/af/util/xp/ut_types.h,v
retrieving revision 1.40
diff -u -5 -p -r1.40 ut_types.h
--- af/util/xp/ut_types.h 2001/01/18 22:27:43 1.40
+++ af/util/xp/ut_types.h 2001/02/03 21:35:46
@@ -43,29 +43,29 @@
 #ifdef HAVE_LIBXML2
 #include <libxml/tree.h>
 #define XML_Char xmlChar
 #endif
 
-typedef unsigned char UT_Byte;
-typedef unsigned short UT_UCSChar; /* Unicode */
+typedef unsigned char UT_Byte;
+typedef unsigned short UT_UCSChar; /* Unicode */
 
-typedef unsigned short UT_uint16;
-typedef unsigned int UT_uint32;
-typedef signed int UT_sint32;
+typedef unsigned short UT_uint16;
+typedef unsigned int UT_uint32;
+typedef signed int UT_sint32;
 
 
 /*
         TODO we currently use plain old C 'int' all over the place.
         For many applications, this is inappropriate, and we should change
         them to UT_sint32. Also, there are places where we are
         using it as a bool, and there are places where we are using it as
         an error code.
 */
 
-typedef unsigned char UT_Bool;
-#define UT_TRUE ((UT_Bool) 1)
-#define UT_FALSE ((UT_Bool) 0)
+typedef unsigned char UT_Bool;
+#define UT_TRUE ((UT_Bool) 1)
+#define UT_FALSE ((UT_Bool) 0)
 
 /*
         UT_Error should be used far more than it is. Any function
         which reasonably could fail at runtime for anything other than
         a coding error or bug should return an error code. Error codes
@@ -74,31 +74,31 @@ typedef unsigned char UT_Bool;
         Addendum: 1-23-99
         If you have any problems with or suggestions for error codes,
         please send them to Sam Tobin-Hochstadt (sytobinh@uchicago.edu).
         I am the person that has worked the most with them.
 */
-typedef UT_sint32 UT_Error;
-#define UT_OK ((UT_Error) 0)
-#define UT_ERROR ((UT_Error) -1) /* VERY generic */
-#define UT_OUTOFMEM ((UT_Error) -100)
-#define UT_SAVE_WRITEERROR ((UT_Error) -201)
-#define UT_SAVE_NAMEERROR ((UT_Error) -202)
-#define UT_SAVE_EXPORTERROR ((UT_Error) -203)
-#define UT_EXTENSIONERROR ((UT_Error) -204)
-#define UT_SAVE_OTHERERROR ((UT_Error) -200) /* This should eventually dissapear. */
-#define UT_IE_FILENOTFOUND ((UT_Error) -301)
-#define UT_IE_NOMEMORY ((UT_Error) -302)
-#define UT_IE_UNKNOWNTYPE ((UT_Error) -303)
-#define UT_IE_BOGUSDOCUMENT ((UT_Error) -304)
-#define UT_IE_COULDNOTOPEN ((UT_Error) -305)
-#define UT_IE_COULDNOTWRITE ((UT_Error) -306)
-#define UT_IE_FAKETYPE ((UT_Error) -307)
-#define UT_INVALIDFILENAME ((UT_Error) -308)
-#define UT_NOPIECETABLE ((UT_Error) -309)
-#define UT_IE_ADDLISTENERERROR ((UT_Error) -310)
-#define UT_IE_UNSUPTYPE ((UT_Error) -311)
-#define UT_IE_IMPORTERROR ((UT_Error) -300) /* The general case */
+typedef UT_sint32 UT_Error;
+#define UT_OK ((UT_Error) 0)
+#define UT_ERROR ((UT_Error) -1) /* VERY generic */
+#define UT_OUTOFMEM ((UT_Error) -100)
+#define UT_SAVE_WRITEERROR ((UT_Error) -201)
+#define UT_SAVE_NAMEERROR ((UT_Error) -202)
+#define UT_SAVE_EXPORTERROR ((UT_Error) -203)
+#define UT_EXTENSIONERROR ((UT_Error) -204)
+#define UT_SAVE_OTHERERROR ((UT_Error) -200) /* This should eventually dissapear. */
+#define UT_IE_FILENOTFOUND ((UT_Error) -301)
+#define UT_IE_NOMEMORY ((UT_Error) -302)
+#define UT_IE_UNKNOWNTYPE ((UT_Error) -303)
+#define UT_IE_BOGUSDOCUMENT ((UT_Error) -304)
+#define UT_IE_COULDNOTOPEN ((UT_Error) -305)
+#define UT_IE_COULDNOTWRITE ((UT_Error) -306)
+#define UT_IE_FAKETYPE ((UT_Error) -307)
+#define UT_INVALIDFILENAME ((UT_Error) -308)
+#define UT_NOPIECETABLE ((UT_Error) -309)
+#define UT_IE_ADDLISTENERERROR ((UT_Error) -310)
+#define UT_IE_UNSUPTYPE ((UT_Error) -311)
+#define UT_IE_IMPORTERROR ((UT_Error) -300) /* The general case */
 
 
 /*
         The MSVC debug runtime library can track leaks back to the
         original allocation via the following black magic.
@@ -113,41 +113,48 @@ typedef UT_sint32 UT_Error;
 /* Unicode character constants. Try to use these rather than
 ** decimal or hex constants throughout the code. See also bug
 ** 512.
 */
 
-#define UCS_TAB ((UT_UCSChar)0x0009)
-#define UCS_LF ((UT_UCSChar)0x000a)
-#define UCS_VTAB ((UT_UCSChar)0x000b)
-#define UCS_FF ((UT_UCSChar)0x000c)
-#define UCS_CR ((UT_UCSChar)0x000d)
-#define UCS_SPACE ((UT_UCSChar)0x0020)
-#define UCS_NBSP ((UT_UCSChar)0x00a0)
-#define UCS_FIELDSTART ((UT_UCSChar)0xFFFE)
-#define UCS_FIELDEND ((UT_UCSChar)0xFFFD)
+/* When objects (fields, etc) must be represented in unicode, use the
+ letter A. Alternatively use some other (better suited) unicode but
+ change UT_isWordDelimiter to not consider it a word delimiter.
+ See bug 223.
+*/
+#define UCS_OBJECT ((UT_UCSChar)0x0041)
+
+#define UCS_TAB ((UT_UCSChar)0x0009)
+#define UCS_LF ((UT_UCSChar)0x000a)
+#define UCS_VTAB ((UT_UCSChar)0x000b)
+#define UCS_FF ((UT_UCSChar)0x000c)
+#define UCS_CR ((UT_UCSChar)0x000d)
+#define UCS_SPACE ((UT_UCSChar)0x0020)
+#define UCS_NBSP ((UT_UCSChar)0x00a0)
+#define UCS_FIELDSTART ((UT_UCSChar)0xFFFE)
+#define UCS_FIELDEND ((UT_UCSChar)0xFFFD)
 
 #if 1 /* try to use the unicode values for special chars */
 #define UCS_EN_SPACE ((UT_UCSChar)0x2002)
 #define UCS_EM_SPACE ((UT_UCSChar)0x2003)
-#define UCS_EN_DASH ((UT_UCSChar)0x2013)
-#define UCS_EM_DASH ((UT_UCSChar)0x2014)
-#define UCS_BULLET ((UT_UCSChar)0x2022)
-#define UCS_LQUOTE ((UT_UCSChar)0x2018)
-#define UCS_RQUOTE ((UT_UCSChar)0x2019)
+#define UCS_EN_DASH ((UT_UCSChar)0x2013)
+#define UCS_EM_DASH ((UT_UCSChar)0x2014)
+#define UCS_BULLET ((UT_UCSChar)0x2022)
+#define UCS_LQUOTE ((UT_UCSChar)0x2018)
+#define UCS_RQUOTE ((UT_UCSChar)0x2019)
 #define UCS_LDBLQUOTE ((UT_UCSChar)0x201c)
 #define UCS_RDBLQUOTE ((UT_UCSChar)0x201d)
 #define UCS_UNKPUNK ((UT_UCSChar)0xFFFF) /* "unknown punctuation" used with UT_isWordDelimiter() */
 
 #else /* see bug 512 */
 
 #define UCS_EN_SPACE ((UT_UCSChar)0x0020)
 #define UCS_EM_SPACE ((UT_UCSChar)0x0020)
-#define UCS_EN_DASH ((UT_UCSChar)0x002d)
-#define UCS_EM_DASH ((UT_UCSChar)0x002d)
-#define UCS_BULLET ((UT_UCSChar)0x0095)
-#define UCS_LQUOTE ((UT_UCSChar)0x0027)
-#define UCS_RQUOTE ((UT_UCSChar)0x0027)
+#define UCS_EN_DASH ((UT_UCSChar)0x002d)
+#define UCS_EM_DASH ((UT_UCSChar)0x002d)
+#define UCS_BULLET ((UT_UCSChar)0x0095)
+#define UCS_LQUOTE ((UT_UCSChar)0x0027)
+#define UCS_RQUOTE ((UT_UCSChar)0x0027)
 #define UCS_LDBLQUOTE ((UT_UCSChar)0x0022)
 #define UCS_RDBLQUOTE ((UT_UCSChar)0x0022)
 #define UCS_UNKPUNK ((UT_UCSChar)0x00FF)
 
 #endif
@@ -156,20 +163,20 @@ typedef UT_sint32 UT_Error;
 ** Some useful macros that we use throughout
 */
 
 #define FREEP(p) do { if (p) free((void *)p); (p)=NULL; } while (0)
 #define DELETEP(p) do { if (p) delete(p); (p)=NULL; } while (0)
-#define REPLACEP(p,q) do { if (p) delete p; p = q; } while (0)
+#define REPLACEP(p,q) do { if (p) delete p; p = q; } while (0)
 #define REFP(p) ((p)->ref(), (p))
 #define UNREFP(p) do { if (p) (p)->unref(); (p)=NULL; } while (0)
 #define CLONEP(p,q) do { FREEP(p); if (q && *q) UT_cloneString(p,q); } while (0)
 
-#define NrElements(a) ((sizeof(a)/sizeof(a[0])))
+#define NrElements(a) ((sizeof(a)/sizeof(a[0])))
 #define MyMax(a,b) (((a)>(b)) ? (a) : (b))
 #define MyMin(a,b) (((a)<(b)) ? (a) : (b))
 
-#define UT_UNUSED(v) do { (v)=(v); } while (0)
+#define UT_UNUSED(v) do { (v)=(v); } while (0)
 
 #define E2B(err) ((err) == UT_OK)
 
 
 /* UGLY UGLY Iconv hack for operating systems with strange declartions
Index: text/ptbl/xp/pt_PieceTable.cpp
===================================================================
RCS file: /cvsroot/abi/src/text/ptbl/xp/pt_PieceTable.cpp,v
retrieving revision 1.64
diff -u -5 -p -r1.64 pt_PieceTable.cpp
--- text/ptbl/xp/pt_PieceTable.cpp 2001/01/30 00:04:57 1.64
+++ text/ptbl/xp/pt_PieceTable.cpp 2001/02/03 21:35:47
@@ -283,97 +283,110 @@ UT_Bool pt_PieceTable::getSpanPtr(PL_Str
 PD_Document * pt_PieceTable::getDocument(void)
 {
         return m_pDocument;
 }
 
-UT_Bool pt_PieceTable::getBlockBuf(PL_StruxDocHandle sdh, UT_GrowBuf * pgb) const
+/*!
+ Copy paragraph (block) into buffer
+ \param sdh Paragraph to copy
+ \retval pgb Buffer where text should be copied to
+ \return Always returns true
+
+ Copy the contents (unicode character data) of the paragraph (block)
+ into the growbuf given. We append the content onto the growbuf.
+*/
+UT_Bool pt_PieceTable::getBlockBuf(PL_StruxDocHandle sdh,
+ UT_GrowBuf * pgb) const
 {
- // copy the contents (unicode character data) of the
- // paragraph (block) into the growbuf given. we append
- // the content onto the growbuf.
-
- UT_ASSERT(pgb);
+ UT_ASSERT(pgb);
         
- pf_Frag * pf = (pf_Frag *)sdh;
- UT_ASSERT(pf->getType() == pf_Frag::PFT_Strux);
- pf_Frag_Strux * pfsBlock = static_cast<pf_Frag_Strux *> (pf);
- UT_ASSERT(pfsBlock->getStruxType() == PTX_Block);
+ pf_Frag * pf = (pf_Frag *)sdh;
+ UT_ASSERT(pf->getType() == pf_Frag::PFT_Strux);
+ pf_Frag_Strux * pfsBlock = static_cast<pf_Frag_Strux *> (pf);
+ UT_ASSERT(pfsBlock->getStruxType() == PTX_Block);
 
- UT_uint32 bufferOffset = pgb->getLength();
+ UT_uint32 bufferOffset = pgb->getLength();
         
- pf_Frag * pfTemp = pfsBlock->getNext();
- while (pfTemp)
- {
- switch (pfTemp->getType())
- {
- default:
- UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
- case pf_Frag::PFT_Strux:
- case pf_Frag::PFT_EndOfDoc:
- pfTemp = NULL;
- break;
-
- case pf_Frag::PFT_FmtMark:
- pfTemp = pfTemp->getNext();
- break;
-
- case pf_Frag::PFT_Text:
- {
- pf_Frag_Text * pft = static_cast<pf_Frag_Text *>(pfTemp);
- const UT_UCSChar * pSpan = getPointer(pft->getBufIndex());
- UT_uint32 length = pft->getLength();
-
- UT_Bool bAppended;
- bAppended = pgb->ins(bufferOffset,pSpan,length);
- UT_ASSERT(bAppended);
-
- bufferOffset += length;
- }
- pfTemp = pfTemp->getNext();
- break;
-
- case pf_Frag::PFT_Object:
- {
- /*
- TODO investigate this....
- Now *here* is a seriously questionable fragment
- of code. :-) We can't let getBlockBuf halt on
- a block when it finds an inline object. However,
- we can't very well sensibly store an inline object
- in a UNICODE character. So, we dump spaces in
- its place, to preserve the integrity of the
- buffer. Obviously, those spaces aren't useful,
- but at least the app doesn't crash, and the rest
- of the text in the block is safely stored in the
- buffer in the proper location.
- */
-
- UT_uint32 length = pfTemp->getLength();
-
- // TODO investigate appending the SPACES directly to
- // TODO the pgb. **or** investigate the cost of this
- // TODO malloc and what happens when it fails....
+ pf_Frag * pfTemp = pfsBlock->getNext();
+ while (pfTemp)
+ {
+ switch (pfTemp->getType())
+ {
+ default:
+ UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
+ case pf_Frag::PFT_Strux:
+ case pf_Frag::PFT_EndOfDoc:
+ pfTemp = NULL;
+ break;
+
+ case pf_Frag::PFT_FmtMark:
+ pfTemp = pfTemp->getNext();
+ break;
+
+ case pf_Frag::PFT_Text:
+ {
+ pf_Frag_Text * pft = static_cast<pf_Frag_Text *>(pfTemp);
+ const UT_UCSChar * pSpan = getPointer(pft->getBufIndex());
+ UT_uint32 length = pft->getLength();
+
+ UT_Bool bAppended;
+ bAppended = pgb->ins(bufferOffset,pSpan,length);
+ UT_ASSERT(bAppended);
+
+ bufferOffset += length;
+ }
+ pfTemp = pfTemp->getNext();
+ break;
+
+ case pf_Frag::PFT_Object:
+ {
+ /*
+ TODO investigate this.... Now *here* is a seriously
+ questionable fragment of code. :-) We can't let
+ getBlockBuf halt on a block when it finds an inline
+ object. However, we can't very well sensibly store an
+ inline object in a UNICODE character. So, we dump
+ USC_BLOCK in its place, to preserve the integrity of the
+ buffer. Obviously, those codes aren't useful, but at
+ least the app doesn't crash, and the rest of the text in
+ the block is safely stored in the buffer in the proper
+ location.
+
+ The UCS_OBJECT used to be defined as a space, but that
+ caused selection code to fail for fields since the code
+ would look for the beginning of a word, ignoring
+ spaces. Now the UCS_OBJECT is instead defined as an
+ alpha character. Doesn't really matter since it'll never
+ be used for anything but limit checking anyway. See bug
+ #223 for details.
+ */
+
+ UT_uint32 length = pfTemp->getLength();
+
+ // TODO investigate appending the SPACES directly to
+ // TODO the pgb. **or** investigate the cost of this
+ // TODO malloc and what happens when it fails....
                                 
- UT_UCSChar* pSpaces = new UT_UCSChar[length];
- for (UT_uint32 i=0; i<length; i++)
- {
- pSpaces[i] = UCS_SPACE;
- }
- UT_Bool bAppended;
- bAppended = pgb->ins(bufferOffset, pSpaces, length);
- delete pSpaces;
- UT_ASSERT(bAppended);
+ UT_UCSChar* pSpaces = new UT_UCSChar[length];
+ for (UT_uint32 i=0; i<length; i++)
+ {
+ pSpaces[i] = UCS_OBJECT;
+ }
+ UT_Bool bAppended;
+ bAppended = pgb->ins(bufferOffset, pSpaces, length);
+ delete pSpaces;
+ UT_ASSERT(bAppended);
                 
- bufferOffset += length;
- }
- pfTemp = pfTemp->getNext();
- break;
- }
- }
+ bufferOffset += length;
+ }
+ pfTemp = pfTemp->getNext();
+ break;
+ }
+ }
 
- UT_ASSERT(bufferOffset == pgb->getLength());
- return UT_TRUE;
+ UT_ASSERT(bufferOffset == pgb->getLength());
+ return UT_TRUE;
 }
 
 UT_Bool pt_PieceTable::getBounds(UT_Bool bEnd, PT_DocPosition & docPos) const
 {
         // be optimistic



This archive was generated by hypermail 2b25 : Sat Feb 03 2001 - 15:39:09 CST