ODFPY  1.2.0
opendocument.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 #
18 # Contributor(s):
19 #
20 # Copyright (C) 2014 Georges Khaznadar <georgesk@debian.org>
21 # migration to Python3, JavaDOC comments and automatic
22 # build of documentation
23 #
24 
25 __doc__="""Use OpenDocument to generate your documents."""
26 
27 import zipfile, time, uuid, sys, mimetypes, copy, os.path
28 
29 # to allow Python3 to access modules in the same path
30 sys.path.append(os.path.dirname(__file__))
31 
32 # using BytesIO provides a cleaner interface than StringIO
33 # with both Python2 and Python3: the programmer must care to
34 # convert strings or unicode to bytes, which is valid for Python 2 and 3.
35 from io import StringIO, BytesIO
36 
37 from odf.namespaces import *
38 import odf.manifest as manifest
39 import odf.meta as meta
40 from odf.office import *
41 import odf.element as element
42 from odf.attrconverters import make_NCName
43 from xml.sax.xmlreader import InputSource
44 from odf.odfmanifest import manifestlist
45 import codecs
46 
47 if sys.version_info[0] == 3:
48  unicode=str # unicode function does not exist
49 
50 __version__= TOOLSVERSION
51 
52 _XMLPROLOGUE = u"<?xml version='1.0' encoding='UTF-8'?>\n"
53 
54 
62 UNIXPERMS = 2175008768
63 
64 IS_FILENAME = 0
65 IS_IMAGE = 1
66 # We need at least Python 2.2
67 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
68 
69 #sys.setrecursionlimit(100)
70 #The recursion limit is set conservative so mistakes like
71 # s=content() s.addElement(s) won't eat up too much processor time.
72 
73 
76 odmimetypes = {
77  u'application/vnd.oasis.opendocument.text': u'.odt',
78  u'application/vnd.oasis.opendocument.text-template': u'.ott',
79  u'application/vnd.oasis.opendocument.graphics': u'.odg',
80  u'application/vnd.oasis.opendocument.graphics-template': u'.otg',
81  u'application/vnd.oasis.opendocument.presentation': u'.odp',
82  u'application/vnd.oasis.opendocument.presentation-template': u'.otp',
83  u'application/vnd.oasis.opendocument.spreadsheet': u'.ods',
84  u'application/vnd.oasis.opendocument.spreadsheet-template': u'.ots',
85  u'application/vnd.oasis.opendocument.chart': u'.odc',
86  u'application/vnd.oasis.opendocument.chart-template': u'.otc',
87  u'application/vnd.oasis.opendocument.image': u'.odi',
88  u'application/vnd.oasis.opendocument.image-template': u'.oti',
89  u'application/vnd.oasis.opendocument.formula': u'.odf',
90  u'application/vnd.oasis.opendocument.formula-template': u'.otf',
91  u'application/vnd.oasis.opendocument.text-master': u'.odm',
92  u'application/vnd.oasis.opendocument.text-web': u'.oth',
93 }
94 
95 
100 
107  def __init__(self, filename, mediatype, content=None):
108  assert(type(filename)==type(u""))
109  assert(type(mediatype)==type(u""))
110  assert(type(content)==type(b"") or content == None)
111 
112  self.mediatype = mediatype
113  self.filename = filename
114  self.content = content
115 
116 
124  thumbnail = None
125 
126 
132  def __init__(self, mimetype, add_generator=True):
133  assert(type(mimetype)==type(u""))
134  assert(isinstance(add_generator,True.__class__))
135 
136  self.mimetype = mimetype
137  self.childobjects = []
138  self._extra = []
139  self.folder = u"" # Always empty for toplevel documents
140  self.topnode = Document(mimetype=self.mimetype)
141  self.topnode.ownerDocument = self
142 
143  self.clear_caches()
144 
145  self.Pictures = {}
146  self.meta = Meta()
147  self.topnode.addElement(self.meta)
148  if add_generator:
149  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
150  self.scripts = Scripts()
151  self.topnode.addElement(self.scripts)
153  self.topnode.addElement(self.fontfacedecls)
155  self.topnode.addElement(self.settings)
156  self.styles = Styles()
157  self.topnode.addElement(self.styles)
159  self.topnode.addElement(self.automaticstyles)
161  self.topnode.addElement(self.masterstyles)
162  self.body = Body()
163  self.topnode.addElement(self.body)
164 
165  def rebuild_caches(self, node=None):
166  if node is None: node = self.topnode
167  self.build_caches(node)
168  for e in node.childNodes:
169  if e.nodeType == element.Node.ELEMENT_NODE:
170  self.rebuild_caches(e)
171 
172 
176  def clear_caches(self):
177  self.element_dict = {}
178  self._styles_dict = {}
179  self._styles_ooo_fix = {}
180 
181 
186  def build_caches(self, elt):
187  # assert(isinstance(elt, element.Element))
188  # why do I need this more intricated assertion?
189  # with Python3, the type of elt pops out as odf.element.Element
190  # in one test ???
191  import odf.element
192  assert(isinstance(elt, element.Element) or isinstance(elt, odf.element.Element) )
193 
194  if elt.qname not in self.element_dict:
195  self.element_dict[elt.qname] = []
196  self.element_dict[elt.qname].append(elt)
197  if elt.qname == (STYLENS, u'style'):
198  self.__register_stylename(elt) # Add to style dictionary
199  styleref = elt.getAttrNS(TEXTNS,u'style-name')
200  if styleref is not None and styleref in self._styles_ooo_fix:
201  elt.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
202 
203 
210  def __register_stylename(self, elt):
211  assert(isinstance(elt, element.Element))
212 
213  name = elt.getAttrNS(STYLENS, u'name')
214  if name is None:
215  return
216  if elt.parentNode.qname in ((OFFICENS,u'styles'), (OFFICENS,u'automatic-styles')):
217  if name in self._styles_dict:
218  newname = u'M'+name # Rename style
219  self._styles_ooo_fix[name] = newname
220  # From here on all references to the old name will refer to the new one
221  name = newname
222  elt.setAttrNS(STYLENS, u'name', name)
223  self._styles_dict[name] = elt
224 
225 
235  def toXml(self, filename=u''):
236  assert(type(filename)==type(u""))
237 
238  result=None
239  xml=StringIO()
240  if sys.version_info[0]==2:
241  xml.write(_XMLPROLOGUE)
242  else:
243  xml.write(_XMLPROLOGUE)
244  self.body.toXml(0, xml)
245  if not filename:
246  result=xml.getvalue()
247  else:
248  f=codecs.open(filename,'w', encoding='utf-8')
249  f.write(xml.getvalue())
250  f.close()
251  return result
252 
253 
258  def xml(self):
259  self.__replaceGenerator()
260  xml=StringIO()
261  if sys.version_info[0]==2:
262  xml.write(_XMLPROLOGUE)
263  else:
264  xml.write(_XMLPROLOGUE)
265  self.topnode.toXml(0, xml)
266  return xml.getvalue().encode("utf-8")
267 
268 
269 
274  def contentxml(self):
275  xml=StringIO()
276  xml.write(_XMLPROLOGUE)
277  x = DocumentContent()
278  x.write_open_tag(0, xml)
279  if self.scripts.hasChildNodes():
280  self.scripts.toXml(1, xml)
281  if self.fontfacedecls.hasChildNodes():
282  self.fontfacedecls.toXml(1, xml)
283  a = AutomaticStyles()
284  stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
285  if len(stylelist) > 0:
286  a.write_open_tag(1, xml)
287  for s in stylelist:
288  s.toXml(2, xml)
289  a.write_close_tag(1, xml)
290  else:
291  a.toXml(1, xml)
292  self.body.toXml(1, xml)
293  x.write_close_tag(0, xml)
294  return xml.getvalue().encode("utf-8")
295 
296 
302  def __manifestxml(self):
303  xml=StringIO()
304  xml.write(_XMLPROLOGUE)
305  self.manifest.toXml(0,xml)
306  result=xml.getvalue()
307  assert(type(result)==type(u""))
308  return result
309 
310 
315  def metaxml(self):
316  self.__replaceGenerator()
317  x = DocumentMeta()
318  x.addElement(self.meta)
319  xml=StringIO()
320  xml.write(_XMLPROLOGUE)
321  x.toXml(0,xml)
322  result=xml.getvalue()
323  assert(type(result)==type(u""))
324  return result
325 
326 
331  def settingsxml(self):
332  x = DocumentSettings()
333  x.addElement(self.settings)
334  xml=StringIO()
335  if sys.version_info[0]==2:
336  xml.write(_XMLPROLOGUE)
337  else:
338  xml.write(_XMLPROLOGUE)
339  x.toXml(0,xml)
340  result=xml.getvalue()
341  assert(type(result)==type(u""))
342  return result
343 
344 
351  def _parseoneelement(self, top, stylenamelist):
352  for e in top.childNodes:
353  if e.nodeType == element.Node.ELEMENT_NODE:
354  for styleref in (
355  (CHARTNS,u'style-name'),
356  (DRAWNS,u'style-name'),
357  (DRAWNS,u'text-style-name'),
358  (PRESENTATIONNS,u'style-name'),
359  (STYLENS,u'data-style-name'),
360  (STYLENS,u'list-style-name'),
361  (STYLENS,u'page-layout-name'),
362  (STYLENS,u'style-name'),
363  (TABLENS,u'default-cell-style-name'),
364  (TABLENS,u'style-name'),
365  (TEXTNS,u'style-name') ):
366  if e.getAttrNS(styleref[0],styleref[1]):
367  stylename = e.getAttrNS(styleref[0],styleref[1])
368  if stylename not in stylenamelist:
369  # due to the polymorphism of e.getAttrNS(),
370  # a unicode type is enforced for elements
371  stylenamelist.append(unicode(stylename))
372  stylenamelist = self._parseoneelement(e, stylenamelist)
373  return stylenamelist
374 
375 
382  def _used_auto_styles(self, segments):
383  stylenamelist = []
384  for top in segments:
385  stylenamelist = self._parseoneelement(top, stylenamelist)
386  stylelist = []
387  for e in self.automaticstyles.childNodes:
388  if isinstance(e, element.Element) and e.getAttrNS(STYLENS,u'name') in stylenamelist:
389  stylelist.append(e)
390 
391  # check the type of the returned data
392  ok=True
393  for e in stylelist: ok = ok and isinstance(e, element.Element)
394  assert(ok)
395 
396  return stylelist
397 
398 
403  def stylesxml(self):
404  xml=StringIO()
405  xml.write(_XMLPROLOGUE)
406  x = DocumentStyles()
407  x.write_open_tag(0, xml)
408  if self.fontfacedecls.hasChildNodes():
409  self.fontfacedecls.toXml(1, xml)
410  self.styles.toXml(1, xml)
411  a = AutomaticStyles()
412  a.write_open_tag(1, xml)
413  for s in self._used_auto_styles([self.masterstyles]):
414  s.toXml(2, xml)
415  a.write_close_tag(1, xml)
416  if self.masterstyles.hasChildNodes():
417  self.masterstyles.toXml(1, xml)
418  x.write_close_tag(0, xml)
419  result = xml.getvalue()
420 
421  assert(type(result)==type(u""))
422 
423  return result
424 
425 
437  def addPicture(self, filename, mediatype=None, content=None):
438  if content is None:
439  if mediatype is None:
440  mediatype, encoding = mimetypes.guess_type(filename)
441  if mediatype is None:
442  mediatype = u''
443  try: ext = filename[filename.rindex(u'.'):]
444  except: ext=u''
445  else:
446  ext = mimetypes.guess_extension(mediatype)
447  manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
448  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
449  content=b"" # this value is only use by the assert further
450  filename=u"" # this value is only use by the assert further
451  else:
452  manifestfn = filename
453  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
454 
455  assert(type(filename)==type(u""))
456  assert(type(content) == type(b""))
457 
458  return manifestfn
459 
460 
471  def addPictureFromFile(self, filename, mediatype=None):
472  if mediatype is None:
473  mediatype, encoding = mimetypes.guess_type(filename)
474  if mediatype is None:
475  mediatype = u''
476  try: ext = filename[filename.rindex(u'.'):]
477  except ValueError: ext=u''
478  else:
479  ext = mimetypes.guess_extension(mediatype)
480  manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
481  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
482 
483  assert(type(filename)==type(u""))
484  assert(type(mediatype)==type(u""))
485 
486  return manifestfn
487 
488 
499  def addPictureFromString(self, content, mediatype):
500  assert(type(content)==type(b""))
501  assert(type(mediatype)==type(u""))
502 
503  ext = mimetypes.guess_extension(mediatype)
504  manifestfn = u"Pictures/%s%s" % (uuid.uuid4().hex.upper(), ext)
505  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
506  return manifestfn
507 
508 
514  def addThumbnail(self, filecontent=None):
515  assert(type(filecontent)==type(b""))
516 
517  if filecontent is None:
518  import thumbnail
519  self.thumbnail = thumbnail.thumbnail()
520  else:
521  self.thumbnail = filecontent
522 
523 
531  def addObject(self, document, objectname=None):
532  assert(isinstance(document, OpenDocument))
533  assert(type(objectname)==type(u"") or objectname == None)
534 
535  self.childobjects.append(document)
536  if objectname is None:
537  document.folder = u"%s/Object %d" % (self.folder, len(self.childobjects))
538  else:
539  document.folder = objectname
540  return u".%s" % document.folder
541 
542 
548  def _savePictures(self, anObject, folder):
549  assert(isinstance(anObject, OpenDocument))
550  assert(type(folder)==type(u""))
551 
552  hasPictures = False
553  for arcname, picturerec in anObject.Pictures.items():
554  what_it_is, fileobj, mediatype = picturerec
555  self.manifest.addElement(manifest.FileEntry(fullpath=u"%s%s" % ( folder ,arcname), mediatype=mediatype))
556  hasPictures = True
557  if what_it_is == IS_FILENAME:
558  self._z.write(fileobj, folder + arcname, zipfile.ZIP_STORED)
559  else:
560  zi = zipfile.ZipInfo(str(arcname), self._now)
561  zi.compress_type = zipfile.ZIP_STORED
562  zi.external_attr = UNIXPERMS
563  self._z.writestr(zi, fileobj)
564  # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
565 # if hasPictures:
566 # self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
567  # Look in subobjects
568  subobjectnum = 1
569  for subobject in anObject.childobjects:
570  self._savePictures(subobject, u'%sObject %d/' % (folder, subobjectnum))
571  subobjectnum += 1
572 
573 
580  def __replaceGenerator(self):
581  for m in self.meta.childNodes[:]:
582  if m.qname == (METANS, u'generator'):
583  self.meta.removeChild(m)
584  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
585 
586 
595  def save(self, outputfile, addsuffix=False):
596 
597  if outputfile == u'-':
598  outputfp = zipfile.ZipFile(sys.stdout,"w")
599  else:
600  if addsuffix:
601  outputfile = outputfile + odmimetypes.get(self.mimetype,u'.xxx')
602  outputfp = zipfile.ZipFile(outputfile, "w")
603  self.__zipwrite(outputfp)
604  outputfp.close()
605 
606 
612  def write(self, outputfp):
613  zipoutputfp = zipfile.ZipFile(outputfp,"w")
614  self.__zipwrite(zipoutputfp)
615 
616 
622  def __zipwrite(self, outputfp):
623  assert(isinstance(outputfp, zipfile.ZipFile))
624 
625  self._z = outputfp
626  self._now = time.localtime()[:6]
627  self.manifest = manifest.Manifest()
628 
629  # Write mimetype
630  zi = zipfile.ZipInfo('mimetype', self._now)
631  zi.compress_type = zipfile.ZIP_STORED
632  zi.external_attr = UNIXPERMS
633  self._z.writestr(zi, self.mimetype.encode("utf-8"))
634 
635  self._saveXmlObjects(self,u"")
636 
637  # Write pictures
638  self._savePictures(self,u"")
639 
640  # Write the thumbnail
641  if self.thumbnail is not None:
642  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/", mediatype=u''))
643  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/thumbnail.png", mediatype=u''))
644  zi = zipfile.ZipInfo(u"Thumbnails/thumbnail.png", self._now)
645  zi.compress_type = zipfile.ZIP_DEFLATED
646  zi.external_attr = UNIXPERMS
647  self._z.writestr(zi, self.thumbnail)
648 
649  # Write any extra files
650  for op in self._extra:
651  if op.filename == u"META-INF/documentsignatures.xml": continue # Don't save signatures
652  self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
653  if sys.version_info[0]==3:
654  zi = zipfile.ZipInfo(op.filename, self._now)
655  else:
656  zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
657  zi.compress_type = zipfile.ZIP_DEFLATED
658  zi.external_attr = UNIXPERMS
659  if op.content is not None:
660  self._z.writestr(zi, op.content)
661  # Write manifest
662  zi = zipfile.ZipInfo(u"META-INF/manifest.xml", self._now)
663  zi.compress_type = zipfile.ZIP_DEFLATED
664  zi.external_attr = UNIXPERMS
665  self._z.writestr(zi, self.__manifestxml() )
666  del self._z
667  del self._now
668  del self.manifest
669 
670 
671 
677  def _saveXmlObjects(self, anObject, folder):
678  assert(isinstance(anObject, OpenDocument))
679  assert(type(folder)==type(u""))
680 
681  if self == anObject:
682  self.manifest.addElement(manifest.FileEntry(fullpath=u"/", mediatype=anObject.mimetype))
683  else:
684  self.manifest.addElement(manifest.FileEntry(fullpath=folder, mediatype=anObject.mimetype))
685  # Write styles
686  self.manifest.addElement(manifest.FileEntry(fullpath=u"%sstyles.xml" % folder, mediatype=u"text/xml"))
687  zi = zipfile.ZipInfo(u"%sstyles.xml" % folder, self._now)
688  zi.compress_type = zipfile.ZIP_DEFLATED
689  zi.external_attr = UNIXPERMS
690  self._z.writestr(zi, anObject.stylesxml().encode("utf-8") )
691 
692  # Write content
693  self.manifest.addElement(manifest.FileEntry(fullpath=u"%scontent.xml" % folder, mediatype=u"text/xml"))
694  zi = zipfile.ZipInfo(u"%scontent.xml" % folder, self._now)
695  zi.compress_type = zipfile.ZIP_DEFLATED
696  zi.external_attr = UNIXPERMS
697  self._z.writestr(zi, anObject.contentxml() )
698 
699  # Write settings
700  if anObject.settings.hasChildNodes():
701  self.manifest.addElement(manifest.FileEntry(fullpath=u"%ssettings.xml" % folder, mediatype=u"text/xml"))
702  zi = zipfile.ZipInfo(u"%ssettings.xml" % folder, self._now)
703  zi.compress_type = zipfile.ZIP_DEFLATED
704  zi.external_attr = UNIXPERMS
705  self._z.writestr(zi, anObject.settingsxml().encode("utf-8") )
706 
707  # Write meta
708  if self == anObject:
709  self.manifest.addElement(manifest.FileEntry(fullpath=u"meta.xml", mediatype=u"text/xml"))
710  zi = zipfile.ZipInfo(u"meta.xml", self._now)
711  zi.compress_type = zipfile.ZIP_DEFLATED
712  zi.external_attr = UNIXPERMS
713  self._z.writestr(zi, anObject.metaxml().encode("utf-8") )
714 
715  # Write subobjects
716  subobjectnum = 1
717  for subobject in anObject.childobjects:
718  self._saveXmlObjects(subobject, u'%sObject %d/' % (folder, subobjectnum))
719  subobjectnum += 1
720 
721 # Document's DOM methods
722 
729  def createElement(self, elt):
730  assert(isinstance(elt, element.Element))
731 
732  # this old code is ambiguous: is 'element' the module or is it the
733  # local variable? To disambiguate this, the local variable has been
734  # renamed to 'elt'
735  #return element(check_grammar=False)
736  return elt(check_grammar=False)
737 
738 
744  def createTextNode(self, data):
745  assert(type(data)==type(u""))
746 
747  return element.Text(data)
748 
749 
755  def createCDATASection(self, data):
756  assert(type(data)==type(u""))
757 
758  return element.CDATASection(cdata)
759 
760 
765  def getMediaType(self):
766  assert (type(self.mimetype)==type(u""))
767 
768  return self.mimetype
769 
770 
776  def getStyleByName(self, name):
777  assert(type(name)==type(u""))
778 
779  ncname = make_NCName(name)
780  if self._styles_dict == {}:
781  self.rebuild_caches()
782  result=self._styles_dict.get(ncname, None)
783 
784  assert(isinstance(result, element.Element))
785  return result
786 
787 
794  def getElementsByType(self, elt):
795  import types
796  assert(isinstance (elt, types.FunctionType))
797 
798  obj = elt(check_grammar=False)
799  assert (isinstance(obj, element.Element))
800 
801  if self.element_dict == {}:
802  self.rebuild_caches()
803 
804  # This previous code was ambiguous
805  # was "element" the module name or the local variable?
806  # the local variable is renamed to "elt" to disambiguate the code
807  #return self.element_dict.get(obj.qname, [])
808 
809  result=self.element_dict.get(obj.qname, [])
810 
811  ok=True
812  for e in result: ok = ok and isinstance(e, element.Element)
813  assert(ok)
814 
815  return result
816 
817 # Convenience functions
818 
824  doc = OpenDocument(u'application/vnd.oasis.opendocument.chart')
825  doc.chart = Chart()
826  doc.body.addElement(doc.chart)
827  return doc
828 
829 
835  doc = OpenDocument(u'application/vnd.oasis.opendocument.graphics')
836  doc.drawing = Drawing()
837  doc.body.addElement(doc.drawing)
838  return doc
839 
840 
846  doc = OpenDocument(u'application/vnd.oasis.opendocument.image')
847  doc.image = Image()
848  doc.body.addElement(doc.image)
849  return doc
850 
851 
857  doc = OpenDocument(u'application/vnd.oasis.opendocument.presentation')
858  doc.presentation = Presentation()
859  doc.body.addElement(doc.presentation)
860  return doc
861 
862 
868  doc = OpenDocument(u'application/vnd.oasis.opendocument.spreadsheet')
869  doc.spreadsheet = Spreadsheet()
870  doc.body.addElement(doc.spreadsheet)
871  return doc
872 
873 
879  doc = OpenDocument(u'application/vnd.oasis.opendocument.text')
880  doc.text = Text()
881  doc.body.addElement(doc.text)
882  return doc
883 
884 
890  doc = OpenDocument(u'application/vnd.oasis.opendocument.text-master')
891  doc.text = Text()
892  doc.body.addElement(doc.text)
893  return doc
894 
895 
903 def __loadxmlparts(z, manifest, doc, objectpath):
904  assert(isinstance(z, zipfile.ZipFile))
905  assert(type(manifest)==type(dict()))
906  assert(isinstance(doc, OpenDocument))
907  assert(type(objectpath)==type(u""))
908 
909  from odf.load import LoadParser
910  from defusedxml.sax import make_parser
911  from xml.sax import handler
912 
913  for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'):
914  if xmlfile not in manifest:
915  continue
916 
919  from xml.sax._exceptions import SAXParseException
920 
921  try:
922  xmlpart = z.read(xmlfile).decode("utf-8")
923  doc._parsing = xmlfile
924 
925  parser = make_parser()
926  parser.setFeature(handler.feature_namespaces, 1)
927  parser.setFeature(handler.feature_external_ges, 0)
928  parser.setContentHandler(LoadParser(doc))
929  parser.setErrorHandler(handler.ErrorHandler())
930 
931  inpsrc = InputSource()
932 
937  xmlpart=__fixXmlPart(xmlpart)
938 
939  inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8")))
940  parser.parse(inpsrc)
941  del doc._parsing
942  except KeyError as v: pass
943  except SAXParseException:
944  print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
945 
946 
954 def __fixXmlPart(xmlpart):
955  result=xmlpart
956  requestedPrefixes = (u'meta', u'config', u'dc', u'style',
957  u'svg', u'fo',u'draw', u'table',u'form')
958  for prefix in requestedPrefixes:
959  if u' xmlns:{prefix}'.format(prefix=prefix) not in xmlpart:
960 
966  try:
967  pos=result.index(u" xmlns:")
968  toInsert=u' xmlns:{prefix}="urn:oasis:names:tc:opendocument:xmlns:{prefix}:1.0"'.format(prefix=prefix)
969  result=result[:pos]+toInsert+result[pos:]
970  except:
971  pass
972  return result
973 
974 
975 
982 def __detectmimetype(zipfd, odffile):
983  assert(isinstance(zipfd, zipfile.ZipFile))
984 
985  try:
986  mimetype = zipfd.read('mimetype').decode("utf-8")
987  return mimetype
988  except:
989  pass
990  # Fall-through to next mechanism
991  manifestpart = zipfd.read('META-INF/manifest.xml')
992  manifest = manifestlist(manifestpart)
993  for mentry,mvalue in manifest.items():
994  if mentry == "/":
995  assert(type(mvalue['media-type'])==type(u""))
996  return mvalue['media-type']
997 
998  # Fall-through to last mechanism
999  return u'application/vnd.oasis.opendocument.text'
1000 
1001 
1008 def load(odffile):
1009  z = zipfile.ZipFile(odffile)
1010  mimetype = __detectmimetype(z, odffile)
1011  doc = OpenDocument(mimetype, add_generator=False)
1012 
1013  # Look in the manifest file to see if which of the four files there are
1014  manifestpart = z.read('META-INF/manifest.xml')
1015  manifest = manifestlist(manifestpart)
1016  __loadxmlparts(z, manifest, doc, u'')
1017  for mentry,mvalue in manifest.items():
1018  if mentry[:9] == u"Pictures/" and len(mentry) > 9:
1019  doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
1020  elif mentry == u"Thumbnails/thumbnail.png":
1021  doc.addThumbnail(z.read(mentry))
1022  elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'):
1023  pass
1024  # Load subobjects into structure
1025  elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/":
1026  subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
1027  doc.addObject(subdoc, u"/" + mentry[:-1])
1028  __loadxmlparts(z, manifest, subdoc, mentry)
1029  elif mentry[:7] == u"Object ":
1030  pass # Don't load subobjects as opaque objects
1031  else:
1032  if mvalue['full-path'][-1] == u'/':
1033  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
1034  else:
1035  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry)))
1036  # Add the SUN junk here to the struct somewhere
1037  # It is cached data, so it can be out-of-date
1038  z.close()
1039  b = doc.getElementsByType(Body)
1040  if mimetype[:39] == u'application/vnd.oasis.opendocument.text':
1041  doc.text = b[0].firstChild
1042  elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics':
1043  doc.graphics = b[0].firstChild
1044  elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation':
1045  doc.presentation = b[0].firstChild
1046  elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet':
1047  doc.spreadsheet = b[0].firstChild
1048  elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart':
1049  doc.chart = b[0].firstChild
1050  elif mimetype[:40] == u'application/vnd.oasis.opendocument.image':
1051  doc.image = b[0].firstChild
1052  elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula':
1053  doc.formula = b[0].firstChild
1054 
1055  return doc
1056 
1057 # vim: set expandtab sw=4 :
def addObject(self, document, objectname=None)
Adds an object (subdocument).
def FontFaceDecls(args)
Definition: office.py:71
just a record to bear a filename, a mediatype and a bytes content
Definition: opendocument.py:99
def DocumentSettings(version="1.2", args)
Definition: office.py:59
def OpenDocumentDrawing()
Creates a drawing document.
def createTextNode(self, data)
Method to create a text node.
def Image(args)
Definition: draw.py:125
def Document(version="1.2", args)
Definition: office.py:50
A class to hold the content of an OpenDocument document Use the xml method to write the XML source to...
def OpenDocumentSpreadsheet()
Creates a spreadsheet document.
def addPicture(self, filename, mediatype=None, content=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def Presentation(args)
Definition: office.py:86
def toXml(self, filename=u'')
converts the document to a valid Xml format.
def OpenDocumentPresentation()
Creates a presentation document.
def MasterStyles(args)
Definition: office.py:80
def metaxml(self)
Generates the meta.xml file.
def contentxml(self)
Generates the content.xml file.
def _saveXmlObjects(self, anObject, folder)
save xml objects of an opendocument to some folder
def addPictureFromString(self, content, mediatype)
Add a picture from contents given as a Byte string.
def __zipwrite(self, outputfp)
Write the document to an open file pointer This is where the real work is done.
def getStyleByName(self, name)
Finds a style object based on the name.
def __register_stylename(self, elt)
Register a style.
def rebuild_caches(self, node=None)
def AutomaticStyles(args)
Definition: office.py:32
def stylesxml(self)
Generates the styles.xml file.
def OpenDocumentImage()
Creates an image document.
def addThumbnail(self, filecontent=None)
Add a fixed thumbnail The thumbnail in the library is big, so this is pretty useless.
def save(self, outputfile, addsuffix=False)
Save the document under the filename.
def load(odffile)
Load an ODF file into memory.
def clear_caches(self)
Clears internal caches.
def DocumentStyles(version="1.2", args)
Definition: office.py:62
def DocumentMeta(version="1.2", args)
Definition: office.py:56
def Chart(args)
Definition: chart.py:31
def __manifestxml(self)
Generates the manifest.xml file; The self.manifest isn&#39;t avaible unless the document is being saved...
def write(self, outputfp)
User API to write the ODF file to an open file descriptor Writes the ZIP format.
def addPictureFromFile(self, filename, mediatype=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def build_caches(self, elt)
Builds internal caches; called from element.py.
def OpenDocumentText()
Creates a text document.
def OpenDocumentTextMaster()
Creates a text master document.
def getMediaType(self)
Returns the media type.
def _savePictures(self, anObject, folder)
saves pictures contained in an object
def OpenDocumentChart()
Creates a chart document.
def __replaceGenerator(self)
Removes a previous &#39;generator&#39; stance and declares TOOLSVERSION as the new generator.
def Settings(args)
Definition: office.py:95
def manifestlist(manifestxml)
Definition: odfmanifest.py:96
def Drawing(args)
Definition: office.py:65
def Styles(args)
Definition: office.py:101
def __init__(self, filename, mediatype, content=None)
the constructor
def _used_auto_styles(self, segments)
Loop through the masterstyles elements, and find the automatic styles that are used.
def _parseoneelement(self, top, stylenamelist)
Finds references to style objects in master-styles and add the style name to the style list if not al...
Definition: meta.py:1
def createCDATASection(self, data)
Method to create a CDATA section.
def xml(self)
Generates the full document as an XML "file".
Creates a arbitrary element and is intended to be subclassed not used on its own. ...
Definition: element.py:357
Definition: load.py:1
def Spreadsheet(args)
Definition: office.py:98
def Text(args)
Definition: form.py:104
def createElement(self, elt)
Inconvenient interface to create an element, but follows XML-DOM.
def __init__(self, mimetype, add_generator=True)
the constructor
def Scripts(args)
Definition: office.py:92
def Body(args)
Definition: office.py:38
def getElementsByType(self, elt)
Gets elements based on the type, which is function from text.py, draw.py etc.
def Meta(args)
Definition: office.py:83
def settingsxml(self)
Generates the settings.xml file.
def DocumentContent(version="1.2", args)
Definition: office.py:53