abiword-dev Mailing List Archive: Re: volodymyr - r29899

From: Volodymyr Rudyj <vladimir.rudoy_at_gmail.com>
Date: Sat Jul 02 2011 - 18:18:21 CEST

Oh, it was accidentally added. I`ll fix it now

On Sat, Jul 2, 2011 at 7:02 PM, Dominic Lachowicz
<domlachowicz@gmail.com> wrote:
>
> You need X11 in an importer?
>
> On Sat, Jul 2, 2011 at 10:35 AM, <cvs@abisource.com> wrote:
> >
> > Author: volodymyr
> > Date: 2011-07-02 16:35:43 +0200 (Sat, 02 Jul 2011)
> > New Revision: 29899
> >
> > Modified:
> > abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
> > abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
> > abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
> > abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> > Log:
> > EPUB import plugin now can import EPUB files. Containers that have several OPS XHTML files are also supported.
> >
> >
> > Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp
> > ===================================================================
> > --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp 2011-07-02 13:26:48 UTC (rev 29898)
> > +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.cpp 2011-07-02 14:35:43 UTC (rev 29899)
> > @@ -18,6 +18,10 @@
> > * 02111-1307, USA.
> > */
> >
> > +#include <X11/X.h>
> > +#include <stdexcept>
> > +#include <zlib.h>
> > +
> > #include "ie_imp_EPUB.h"
> >
> > IE_Imp_EPUB::IE_Imp_EPUB(PD_Document* pDocument) : IE_Imp(pDocument)
> > @@ -37,6 +41,353 @@
> >
> > UT_Error IE_Imp_EPUB::_loadFile(GsfInput* input)
> > {
> > + m_epub = gsf_infile_zip_new(input, NULL);
> >
> > + if (m_epub == NULL)
> > + {
> > + UT_DEBUGMSG(("Can`t create gsf input zip object\n"));
> > + return UT_ERROR;
> > + }
> > +
> > +
> > + UT_DEBUGMSG(("Reading metadata\n"));
> > + if (readMetadata() != UT_OK)
> > + {
> > + UT_DEBUGMSG(("Failed to read metadata\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + UT_DEBUGMSG(("Reading package information\n"));
> > + if (readPackage() != UT_OK)
> > + {
> > + UT_DEBUGMSG(("Failed to read package information\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + UT_DEBUGMSG(("Uncompressing OPS data\n"));
> > + if (uncompress() != UT_OK)
> > + {
> > + UT_DEBUGMSG(("Failed to uncompress data\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + UT_DEBUGMSG(("Reading OPS data\n"));
> > + if (readStructure() != UT_OK)
> > + {
> > + UT_DEBUGMSG(("Failed to read OPS data\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + return UT_OK;
> > +
> > }
> >
> > +UT_Error IE_Imp_EPUB::readMetadata()
> > +{
> > + GsfInput* metaInf = gsf_infile_child_by_name(m_epub, "META-INF");
> > +
> > + if (metaInf == NULL)
> > + {
> > + UT_DEBUGMSG(("Can`t open container META-INF dir\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + GsfInput* meta = gsf_infile_child_by_name(GSF_INFILE(metaInf), "container.xml");
> > +
> > + if (meta == NULL)
> > + {
> > + UT_DEBUGMSG(("Can`t open container metadata\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + size_t metaSize = gsf_input_size(meta);
> > +
> > + if (metaSize == 0)
> > + {
> > + UT_DEBUGMSG(("Container metadata file is empty\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + gchar* metaXml = (gchar*)gsf_input_read(meta, metaSize, NULL);
> > +
> > +
> > + UT_UTF8String rootfilePath;
> > + UT_XML metaParser;
> > + ContainerListener containerListener;
> > + metaParser.setListener(&containerListener);
> > +
> > + if (metaParser.sniff(metaXml, metaSize, "container"))
> > + {
> > + UT_DEBUGMSG(("Parsing container.xml file\n"));
> > + metaParser.parse(metaXml, metaSize);
> > + } else
> > + {
> > + UT_DEBUGMSG(("Incorrect container.xml file\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + m_rootfilePath = containerListener.getRootFilePath();
> > +
> > + g_object_unref(G_OBJECT(meta));
> > + g_object_unref(G_OBJECT(metaInf));
> > +
> > + return UT_OK;
> > +}
> > +
> > +UT_Error IE_Imp_EPUB::readPackage()
> > +{
> > + gchar **aname = g_strsplit(m_rootfilePath.utf8_str(), G_DIR_SEPARATOR_S, 0);
> > + GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**)aname);
> > +
> > + UT_DEBUGMSG(("Getting parent\n"));
> > + GsfInfile* opfParent = gsf_input_container(opf);
> > + m_opsDir = UT_UTF8String(gsf_input_name(GSF_INPUT(opfParent)));
> > +
> > + UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.utf8_str()));
> > +
> > + if (opf == NULL){
> > + UT_DEBUGMSG(("Can`t open .opf file\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + size_t opfSize = gsf_input_size(opf);
> > + gchar* opfXml = (gchar*)gsf_input_read(opf, opfSize, NULL);
> > +
> > + UT_XML opfParser;
> > + OpfListener opfListener;
> > + opfParser.setListener(&opfListener);
> > + if (opfParser.sniff(opfXml, opfSize, "package"))
> > + {
> > + UT_DEBUGMSG(("Parsing opf file\n"));
> > + opfParser.parse(opfXml, opfSize);
> > + } else
> > + {
> > + UT_DEBUGMSG(("Incorrect opf file found \n"));
> > + return UT_ERROR;
> > + }
> > +
> > + g_strfreev(aname);
> > + g_object_unref(G_OBJECT(opf));
> > + //g_object_unref(G_OBJECT(opfParent));
> > +
> > + m_spine = opfListener.getSpine();
> > + m_manifestItems = opfListener.getManifestItems();
> > +
> > + return UT_OK;
> > +}
> > +
> > +UT_Error IE_Imp_EPUB::uncompress()
> > +{
> > + m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
> > + m_tmpDir += G_DIR_SEPARATOR_S;
> > + m_tmpDir += getDoc()->getDocUUIDString();
> > +
> > + if (!UT_go_directory_create(m_tmpDir.utf8_str(), 0644, NULL))
> > + {
> > + UT_DEBUGMSG(("Can`t create temporary directory\n"));
> > + return UT_ERROR;
> > + }
> > + GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub, m_opsDir.utf8_str());
> > + UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
> > + if (opsDirInput == NULL)
> > + {
> > + UT_DEBUGMSG(("Failed to open OPS dir\n"));
> > + return UT_ERROR;
> > + }
> > +
> > + for(std::map<UT_UTF8String, UT_UTF8String>::iterator i = m_manifestItems.begin(); i != m_manifestItems.end(); i++)
> > + {
> > + gchar *itemFileName = UT_go_filename_from_uri((m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).utf8_str());
> > + gchar** aname = g_strsplit((*i).second.utf8_str(), G_DIR_SEPARATOR_S, 0);
> > +
> > +
> > + GsfInput* itemInput = gsf_infile_child_by_aname(GSF_INFILE(opsDirInput), (const char**)aname);
> > + GsfOutput* itemOutput = createFileByPath(itemFileName);
> > + gsf_input_seek(itemInput, 0, G_SEEK_SET);
> > + gsf_input_copy(itemInput, itemOutput);
> > + g_strfreev(aname);
> > + g_free(itemFileName);
> > + g_object_unref(G_OBJECT(itemInput));
> > + gsf_output_close(itemOutput);
> > + }
> > +
> > + g_object_unref(G_OBJECT(opsDirInput));
> > +
> > +
> > + return UT_OK;
> > +}
> > +
> > +UT_Error IE_Imp_EPUB::readStructure()
> > +{
> > + getDoc()->createRawDocument();
> > + getDoc()->finishRawCreation();
> > +
> > + for(std::vector<UT_UTF8String>::iterator i = m_spine.begin(); i != m_spine.end(); i++)
> > + {
> > + try
> > + {
> > +
> > + UT_UTF8String itemPath = m_tmpDir + G_DIR_SEPARATOR_S + m_manifestItems.at(*i);
> > + PT_DocPosition posEnd = 0;
> > + getDoc()->getBounds(true, posEnd);
> > +
> > + GsfInput* itemInput = UT_go_file_open(itemPath.utf8_str(), NULL);
> > + size_t inputSize = gsf_input_size(itemInput);
> > + gchar* inputData = (gchar*)gsf_input_read(itemInput, inputSize, NULL);
> > +
> > + PD_Document *currentDoc = new PD_Document();
> > + currentDoc->createRawDocument();
> > + const char *suffix = strchr(itemPath.utf8_str(), '.');
> > + currentDoc->importFile(itemPath.utf8_str(),
> > + IE_Imp::fileTypeForSuffix(suffix), true, false, NULL);
> > + currentDoc->finishRawCreation();
> > +
> > + IE_Imp_PasteListener * pPasteListener = new IE_Imp_PasteListener(getDoc(),posEnd, currentDoc);
> > + currentDoc->tellListener(static_cast<PL_Listener *>(pPasteListener));
> > +
> > + DELETEP(pPasteListener);
> > + UNREFP(currentDoc);
> > + g_object_unref(G_OBJECT(itemInput));
> > +
> > + } catch (std::out_of_range e)
> > + {
> > + return UT_ERROR;
> > + }
> > + }
> > +
> > + return UT_OK;
> > +}
> > +
> > +GsfOutput* IE_Imp_EPUB::createFileByPath(const char* path)
> > +{
> > + gchar** components = g_strsplit(path, G_DIR_SEPARATOR_S, 0);
> > + UT_UTF8String curPath = UT_UTF8String(components[0]);
> > +
> > + int current = 0;
> > + GsfOutput* output = NULL;
> > + while (components[current] != NULL)
> > + {
> > + curPath += components[current];
> > + current++;
> > +
> > + char *uri = UT_go_filename_to_uri(curPath.utf8_str());
> > + bool fileExists = UT_go_file_exists(uri);
> > + if (!fileExists && (components[current] != NULL))
> > + {
> > + UT_go_directory_create(uri, 0644, NULL);
> > + } else
> > + {
> > + if (!fileExists)
> > + {
> > + output = UT_go_file_create(uri, NULL);
> > + break;
> > + }
> > + }
> > +
> > + g_free(uri);
> > +
> > + if (components[current] != NULL)
> > + {
> > + curPath += G_DIR_SEPARATOR_S;
> > + }
> > + }
> > +
> > + g_strfreev(components);
> > + return output;
> > +}
> > +
> > +void ContainerListener::startElement(const gchar* name, const gchar** atts)
> > +{
> > + if (!UT_go_utf8_collate_casefold(name, "rootfile"))
> > + {
> > + m_rootFilePath = UT_UTF8String(UT_getAttribute("full-path", atts));
> > + UT_DEBUGMSG(("Found rootfile%s\n", m_rootFilePath.utf8_str()));
> > + }
> > +}
> > +
> > +void ContainerListener::endElement(const gchar* name)
> > +{
> > +}
> > +
> > +void ContainerListener::charData(const gchar* buffer, int length)
> > +{
> > +
> > +}
> > +
> > +UT_UTF8String ContainerListener::getRootFilePath() const
> > +{
> > + return m_rootFilePath;
> > +}
> > +
> > +/*
> > +
> > + */
> > +
> > +OpfListener::OpfListener():
> > + m_inManifest(false)
> > +{
> > +
> > +}
> > +
> > +void OpfListener::startElement(const gchar* name, const gchar** atts)
> > +{
> > + if (!UT_go_utf8_collate_casefold(name, "manifest"))
> > + {
> > + m_inManifest = true;
> > + }
> > +
> > + if (!UT_go_utf8_collate_casefold(name, "spine"))
> > + {
> > + m_inSpine = true;
> > + }
> > +
> > + if (m_inManifest)
> > + {
> > + if (!UT_go_utf8_collate_casefold(name, "item"))
> > + {
> > + m_manifestItems.insert(string_pair(UT_UTF8String(UT_getAttribute("id", atts)),
> > + UT_UTF8String(UT_getAttribute("href", atts))));
> > + UT_DEBUGMSG(("Found manifest item: %s\n", UT_getAttribute("href", atts)));
> > + }
> > + }
> > +
> > + if (m_inSpine)
> > + {
> > + if (!UT_go_utf8_collate_casefold(name, "itemref"))
> > + {
> > + // We can ignore "linear" attribute as it said in specification
> > + m_spine.push_back(UT_UTF8String(UT_getAttribute("idref", atts)));
> > + UT_DEBUGMSG(("Found spine itemref: %s\n", UT_getAttribute("idref", atts)));
> > + }
> > + }
> > +
> > +}
> > +
> > +void OpfListener::endElement(const gchar* name)
> > +{
> > +
> > +}
> > +
> > +void OpfListener::charData(const gchar* buffer, int length)
> > +{
> > +
> > +}
> > +
> > +/*
> > +
> > + */
> > +
> > +void NavigationListener::startElement(const gchar* name, const gchar** atts)
> > +{
> > +
> > +}
> > +
> > +void NavigationListener::endElement(const gchar* name)
> > +{
> > +
> > +}
> > +
> > +void NavigationListener::charData(const gchar* buffer, int length)
> > +{
> > +
> > +}
> > \ No newline at end of file
> >
> > Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h
> > ===================================================================
> > --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h 2011-07-02 13:26:48 UTC (rev 29898)
> > +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB.h 2011-07-02 14:35:43 UTC (rev 29899)
> > @@ -21,28 +21,108 @@
> > #ifndef IE_IMP_EPUB_H_
> > #define IE_IMP_EPUB_H_
> >
> > -#include "ie_imp.h"
> > +#include <gsf/gsf-infile-zip.h>
> > +#include <gsf/gsf-infile.h>
> > +#include <gsf/gsf-libxml.h>
> > +#include <ut_go_file.h>
> > +#include <vector>
> > +#include <map>
> >
> > +
> > +// AbiWord includes
> > +#include <ie_imp.h>
> > +#include <ie_imp_XHTML.h>
> > +#include <ut_xml.h>
> > +#include <ie_imp_PasteListener.h>
> > +
> > #define EPUB_MIMETYPE "application/epub+zip"
> >
> >
> > +typedef std::pair<UT_UTF8String, UT_UTF8String> string_pair;
> > /**
> > * Class used to import EPUB files
> > */
> > class IE_Imp_EPUB : public IE_Imp
> > {
> > public:
> > -
> > IE_Imp_EPUB (PD_Document * pDocument);
> > virtual ~IE_Imp_EPUB ();
> > - virtual bool pasteFromBuffer(PD_DocumentRange * pDocRange,
> > + virtual bool pasteFromBuffer(PD_DocumentRange * pDocRange,
> > const unsigned char * pData,
> > UT_uint32 lenData,
> > const char * szEncoding = 0);
> > -
> > protected:
> > - virtual UT_Error _loadFile(GsfInput * input);
> > + virtual UT_Error _loadFile(GsfInput * input);
> > +
> > +private:
> > + GsfInfile* m_epub;
> > + UT_UTF8String m_rootfilePath;
> > + UT_UTF8String m_tmpDir;
> > + UT_UTF8String m_opsDir;
> > + std::vector<UT_UTF8String> m_spine;
> > + std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> > +
> > + UT_Error readMetadata();
> > + UT_Error readPackage();
> > + UT_Error uncompress();
> > + UT_Error readStructure();
> > + static GsfOutput* createFileByPath(const char* path);
> > };
> >
> > +/*
> > + * Listener for parsing container.xml data
> > + */
> > +class ContainerListener : public UT_XML::Listener
> > +{
> > +public:
> > + void startElement (const gchar * name, const gchar ** atts);
> > + void endElement (const gchar * name);
> > + void charData (const gchar * buffer, int length);
> > +
> > + UT_UTF8String getRootFilePath() const;
> > +
> > +private:
> > + UT_UTF8String m_rootFilePath;
> > +};
> > +
> > +/*
> > + * Listener for parsing .opf
> > + */
> > +class OpfListener : public UT_XML::Listener
> > +{
> > +public:
> > + void startElement (const gchar * name, const gchar ** atts);
> > + void endElement (const gchar * name);
> > + void charData (const gchar * buffer, int length);
> > +
> > + std::map<UT_UTF8String, UT_UTF8String> getManifestItems() const { return m_manifestItems; }
> > + std::vector<UT_UTF8String> getSpine() const { return m_spine; }
> > +
> > + OpfListener();
> > +
> > +private:
> > + /* Vector with list of OPS files needed to be imported. Sorted in the linear
> > + * reading order
> > + */
> > + std::vector<UT_UTF8String> m_spine;
> > + /* Map with all files that will be used for import
> > + */
> > + std::map<UT_UTF8String, UT_UTF8String> m_manifestItems;
> > +
> > + bool m_inManifest;
> > + bool m_inSpine;
> > +};
> > +
> > +/*
> > + * Listener for parsing .ncx
> > + */
> > +class NavigationListener : public UT_XML::Listener
> > +{
> > +public:
> > + void startElement (const gchar * name, const gchar ** atts);
> > + void endElement (const gchar * name);
> > + void charData (const gchar * buffer, int length);
> > +};
> > +
> > #endif
> >
> >
> > Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp
> > ===================================================================
> > --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp 2011-07-02 13:26:48 UTC (rev 29898)
> > +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.cpp 2011-07-02 14:35:43 UTC (rev 29899)
> > @@ -39,7 +39,7 @@
> > IE_Imp_EPUB_Sniffer::IE_Imp_EPUB_Sniffer() :
> > IE_ImpSniffer("EPUB::EPUB")
> > {
> > -
> > + UT_DEBUGMSG(("Constructing sniffer\n"));
> > }
> >
> > IE_Imp_EPUB_Sniffer::~IE_Imp_EPUB_Sniffer()
> > @@ -49,16 +49,19 @@
> >
> > const IE_SuffixConfidence * IE_Imp_EPUB_Sniffer::getSuffixConfidence()
> > {
> > + UT_DEBUGMSG(("Recognizing suffixes\n"));
> > return IE_Imp_EPUB_Sniffer_SuffixConfidence;
> > }
> >
> > const IE_MimeConfidence * IE_Imp_EPUB_Sniffer::getMimeConfidence()
> > {
> > - return IE_Imp_EPUB_Sniffer_MimeConfidence;
> > + UT_DEBUGMSG(("Recognizing mime type\n"));
> > + return IE_Imp_EPUB_Sniffer_MimeConfidence;
> > }
> >
> > UT_Confidence_t IE_Imp_EPUB_Sniffer::recognizeContents(GsfInput * input)
> > {
> > + UT_DEBUGMSG(("Recognizing contents\n"));
> > GsfInfile* zip = gsf_infile_zip_new(input, NULL);
> > UT_Confidence_t confidence = UT_CONFIDENCE_ZILCH;
> > if (zip != NULL)
> > @@ -67,19 +70,27 @@
> >
> > if (mimetype != NULL)
> > {
> > - gsf_off_t size = gsf_input_size(mimetype);
> > + UT_DEBUGMSG(("Opened 'mimetype' file\n"));
> > + size_t size = gsf_input_size(mimetype);
> >
> > if (size > 0)
> > {
> > - gchar* mime = (gchar*)gsf_input_read(mimetype, size, NULL);
> > + UT_DEBUGMSG(("Reading 'mimetype' file contents\n"));
> > + gchar* pMime = (gchar*)gsf_input_read(mimetype, size, NULL);
> > + UT_UTF8String mimeStr;
> > + mimeStr.append(pMime, size);
> >
> > - if (!strcmp(mime, EPUB_MIMETYPE))
> > + if (!strcmp(mimeStr.utf8_str(), EPUB_MIMETYPE))
> > {
> > + UT_DEBUGMSG(("RUDYJ: Found EPUB\n"));
> > confidence = UT_CONFIDENCE_PERFECT;
> > - }
> > - g_free(mime);
> > + }
> > }
> > - }
> > +
> > + g_object_unref(G_OBJECT(mimetype));
> > + }
> > +
> > + g_object_unref(G_OBJECT(zip));
> > }
> >
> > return confidence;
> > @@ -88,7 +99,9 @@
> > UT_Error IE_Imp_EPUB_Sniffer::constructImporter(PD_Document * pDocument,
> > IE_Imp ** ppie)
> > {
> > - *ppie = new IE_Imp_EPUB(pDocument);
> > + UT_DEBUGMSG(("Constructing importer\n"));
> > + IE_Imp_EPUB* importer = new IE_Imp_EPUB(pDocument);
> > + *ppie = importer;
> >
> > return UT_OK;
> > }
> >
> > Modified: abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h
> > ===================================================================
> > --- abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h 2011-07-02 13:26:48 UTC (rev 29898)
> > +++ abiword/trunk/plugins/epub/imp/xp/ie_imp_EPUB_Sniffer.h 2011-07-02 14:35:43 UTC (rev 29899)
> > @@ -21,11 +21,12 @@
> > #ifndef IE_IMP_EPUB_SNIFFER_H_
> > #define IE_IMP_EPUB_SNIFFER_H_
> >
> > +#include <gsf/gsf-infile-zip.h>
> > #include <gsf/gsf-infile.h>
> > -#include <gsf/gsf-infile-zip.h>
> > +#include <gsf/gsf-libxml.h>
> >
> > +#include "ie_imp.h"
> > #include "ie_imp_EPUB.h"
> > -#include "ie_imp.h"
> >
> >
> > class IE_Imp_EPUB_Sniffer : public IE_ImpSniffer
> >
> > -----------------------------------------------
> > To unsubscribe from this list, send a message to
> > abisource-cvs-commit-request@abisource.com with the word
> > unsubscribe in the message body.
> >
>
>
>
> --
> "I like to pay taxes. With them, I buy civilization." -- Oliver Wendell Holmes
Received on Sat Jul 2 18:18:55 2011

This archive was generated by hypermail 2.1.8 : Sat Jul 02 2011 - 18:18:55 CEST

Re: volodymyr - r29899 - abiword/trunk/plugins/epub/imp/xp