From cbf8a2a65928694202e19b6bcf56ec84bcbf613c Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Sat, 8 Dec 2012 02:02:25 +0100
Subject: Reorganize source tree and files installation location, cleanup
 setup.py (Closes: #689409)

---
 lib/office.py | 265 ----------------------------------------------------------
 1 file changed, 265 deletions(-)
 delete mode 100644 lib/office.py

(limited to 'lib/office.py')

diff --git a/lib/office.py b/lib/office.py
deleted file mode 100644
index d14125b..0000000
--- a/lib/office.py
+++ /dev/null
@@ -1,265 +0,0 @@
-'''
-    Care about office's formats
-'''
-
-import os
-import logging
-import zipfile
-import fileinput
-import xml.dom.minidom as minidom
-
-try:
-    import cairo
-    import poppler
-except ImportError:
-    pass
-
-import mat
-import parser
-import archive
-
-
-class OpenDocumentStripper(archive.GenericArchiveStripper):
-    '''
-        An open document file is a zip, with xml file into.
-        The one that interest us is meta.xml
-    '''
-
-    def get_meta(self):
-        '''
-            Return a dict with all the meta of the file by
-            trying to read the meta.xml file.
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        metadata = {}
-        try:
-            content = zipin.read('meta.xml')
-            dom1 = minidom.parseString(content)
-            elements = dom1.getElementsByTagName('office:meta')
-            for i in elements[0].childNodes:
-                if i.tagName != 'meta:document-statistic':
-                    nodename = ''.join([k for k in i.nodeName.split(':')[1:]])
-                    metadata[nodename] = ''.join([j.data for j in i.childNodes])
-                else:
-                    # thank you w3c for not providing a nice
-                    # method to get all attributes from a node
-                    pass
-            zipin.close()
-        except KeyError:  # no meta.xml file found
-            logging.debug('%s has no opendocument metadata' % self.filename)
-        return metadata
-
-    def _remove_all(self):
-        '''
-            FIXME ?
-            There is a patch implementing the Zipfile.remove()
-            method here : http://bugs.python.org/issue6818
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
-
-        for item in zipin.namelist():
-            name = os.path.join(self.tempdir, item)
-            _, ext = os.path.splitext(name)
-
-            if item.endswith('manifest.xml'):
-            # contain the list of all files present in the archive
-                zipin.extract(item, self.tempdir)
-                for line in fileinput.input(name, inplace=1):
-                    #remove the line which contains "meta.xml"
-                    line = line.strip()
-                    if not 'meta.xml' in line:
-                        print line
-                zipout.write(name, item)
-
-            elif ext in parser.NOMETA or item == 'mimetype':
-                #keep NOMETA files, and the "manifest" file
-                if item != 'meta.xml':  # contains the metadata
-                    zipin.extract(item, self.tempdir)
-                    zipout.write(name, item)
-
-            else:
-                zipin.extract(item, self.tempdir)
-                if os.path.isfile(name):
-                    try:
-                        cfile = mat.create_class_file(name, False,
-                            self.add2archive)
-                        cfile.remove_all()
-                        logging.debug('Processing %s from %s' % (item,
-                            self.filename))
-                        zipout.write(name, item)
-                    except:
-                        logging.info('%s\' fileformat is not supported' % item)
-                        if self.add2archive:
-                            zipout.write(name, item)
-        zipout.comment = ''
-        logging.info('%s treated' % self.filename)
-        zipin.close()
-        zipout.close()
-        self.do_backup()
-        return True
-
-    def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        try:
-            zipin.getinfo('meta.xml')
-        except KeyError:  # no meta.xml in the file
-            czf = archive.ZipStripper(self.filename, self.parser,
-                'application/zip', self.backup, self.add2archive)
-            if czf.is_clean():
-                zipin.close()
-                return True
-        zipin.close()
-        return False
-
-
-class PdfStripper(parser.GenericParser):
-    '''
-        Represent a PDF file
-    '''
-    def __init__(self, filename, parser, mime, backup, add2archive):
-        super(PdfStripper, self).__init__(filename, parser, mime, backup,
-            add2archive)
-        uri = 'file://' + os.path.abspath(self.filename)
-        self.password = None
-        self.document = poppler.document_new_from_file(uri, self.password)
-        self.meta_list = frozenset(['title', 'author', 'subject', 'keywords', 'creator',
-            'producer', 'metadata'])
-
-    def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
-        '''
-        for key in self.meta_list:
-            if self.document.get_property(key):
-                return False
-        return True
-
-    def remove_all(self):
-        '''
-            Remove metadata
-        '''
-        return self._remove_meta()
-
-    def _remove_meta(self):
-        '''
-            Opening the PDF with poppler, then doing a render
-            on a cairo pdfsurface for each pages.
-
-            http://cairographics.org/documentation/pycairo/2/
-            python-poppler is not documented at all : have fun ;)
-        '''
-        page = self.document.get_page(0)
-        # assume that every pages are the same size
-        page_width, page_height = page.get_size()
-        surface = cairo.PDFSurface(self.output, page_width, page_height)
-        context = cairo.Context(surface)  # context draws on the surface
-        logging.debug('PDF rendering of %s' % self.filename)
-        for pagenum in xrange(self.document.get_n_pages()):
-            page = self.document.get_page(pagenum)
-            context.translate(0, 0)
-            page.render_for_printing(context)  # render the page on context
-            context.show_page()  # draw context on surface
-        surface.finish()
-
-        try:
-            import pdfrw  # For now, poppler cannot write meta, so we must use pdfrw
-            logging.debug('Removing %s\'s superficial metadata' % self.filename)
-            trailer = pdfrw.PdfReader(self.output)
-            trailer.Info.Producer = None
-            trailer.Info.Creator = None
-            writer = pdfrw.PdfWriter()
-            writer.trailer = trailer
-            writer.write(self.output)
-            self.do_backup()
-            return True
-        except:
-            print('Unable to remove all metadata from %s, please install\
-pdfrw' % self.output)
-            return False
-        return True
-
-    def get_meta(self):
-        '''
-            Return a dict with all the meta of the file
-        '''
-        metadata = {}
-        for key in self.meta_list:
-            if self.document.get_property(key):
-                metadata[key] = self.document.get_property(key)
-        return metadata
-
-
-class OpenXmlStripper(archive.GenericArchiveStripper):
-    '''
-        Represent an office openxml document, which is like
-        an opendocument format, with some tricky stuff added.
-        It contains mostly xml, but can have media blobs, crap, ...
-        (I don't like this format.)
-    '''
-    def _remove_all(self):
-        '''
-            FIXME ?
-            There is a patch implementing the Zipfile.remove()
-            method here : http://bugs.python.org/issue6818
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        zipout = zipfile.ZipFile(self.output, 'w',
-            allowZip64=True)
-        for item in zipin.namelist():
-            name = os.path.join(self.tempdir, item)
-            _, ext = os.path.splitext(name)
-            if item.startswith('docProps/'):  # metadatas
-                pass
-            elif ext in parser.NOMETA or item == '.rels':
-                #keep parser.NOMETA files, and the file named ".rels"
-                zipin.extract(item, self.tempdir)
-                zipout.write(name, item)
-            else:
-                zipin.extract(item, self.tempdir)
-                if os.path.isfile(name):  # don't care about folders
-                    try:
-                        cfile = mat.create_class_file(name, False,
-                            self.add2archive)
-                        cfile.remove_all()
-                        logging.debug('Processing %s from %s' % (item,
-                            self.filename))
-                        zipout.write(name, item)
-                    except:
-                        logging.info('%s\' fileformat is not supported' % item)
-                        if self.add2archive:
-                            zipout.write(name, item)
-        zipout.comment = ''
-        logging.info('%s treated' % self.filename)
-        zipin.close()
-        zipout.close()
-        self.do_backup()
-        return True
-
-    def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        for item in zipin.namelist():
-            if item.startswith('docProps/'):
-                return False
-        zipin.close()
-        czf = archive.ZipStripper(self.filename, self.parser,
-                'application/zip', self.backup, self.add2archive)
-        return czf.is_clean()
-
-    def get_meta(self):
-        '''
-            Return a dict with all the meta of the file
-        '''
-        zipin = zipfile.ZipFile(self.filename, 'r')
-        metadata = {}
-        for item in zipin.namelist():
-            if item.startswith('docProps/'):
-                metadata[item] = 'harmful content'
-        zipin.close()
-        return metadata
-- 
cgit v1.3