summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorjvoisin2011-07-26 15:14:48 +0200
committerjvoisin2011-07-26 15:14:48 +0200
commit962e9aec5ffcdaae39e06f277dd47d1943205c37 (patch)
tree85d57c245f010579dc3eb9199fe3ed6e379e96d8 /lib
parent7c9edd6514854f707b87e150a1ffa327ebd8dcac (diff)
Bugfixes (especially for pdf), and more pylint conformity
Diffstat (limited to 'lib')
-rw-r--r--lib/archive.py40
-rw-r--r--lib/audio.py2
-rw-r--r--lib/images.py10
-rw-r--r--lib/mat.py4
-rw-r--r--lib/misc.py5
-rw-r--r--lib/office.py23
-rw-r--r--lib/parser.py4
7 files changed, 63 insertions, 25 deletions
diff --git a/lib/archive.py b/lib/archive.py
index f11506a..1aaf74b 100644
--- a/lib/archive.py
+++ b/lib/archive.py
@@ -10,7 +10,7 @@ import parser
10import mat 10import mat
11 11
12 12
13class GenericArchiveStripper(parser.Generic_parser): 13class GenericArchiveStripper(parser.GenericParser):
14 ''' 14 '''
15 Represent a generic archive 15 Represent a generic archive
16 ''' 16 '''
@@ -29,24 +29,40 @@ class GenericArchiveStripper(parser.Generic_parser):
29 shutil.rmtree(self.tempdir) 29 shutil.rmtree(self.tempdir)
30 30
31 def remove_all(self): 31 def remove_all(self):
32 '''
33 Call _remove_all() with in argument : "normal"
34 '''
32 self._remove_all('normal') 35 self._remove_all('normal')
33 36
34 def remove_all_ugly(self): 37 def remove_all_ugly(self):
38 '''
39 call remove_all() with in argument : "ugly"
40 '''
35 self._remove_all('ugly') 41 self._remove_all('ugly')
36 42
43 def _remove_all(self, method):
44 '''
45 Remove all meta, normal way if method is "normal",
46 else, use the ugly way (with possible data loss)
47 '''
48 raise NotImplementedError
37 49
38class ZipStripper(GenericArchiveStripper): 50class ZipStripper(GenericArchiveStripper):
39 ''' 51 '''
40 Represent a zip file 52 Represent a zip file
41 ''' 53 '''
42 def is_file_clean(self, file): 54 def is_file_clean(self, fileinfo):
43 if file.comment is not '': 55 '''
56 Check if a ZipInfo object is clean of metadatas added
57 by zip itself, independently of the corresponding file metadatas
58 '''
59 if fileinfo.comment is not '':
44 return False 60 return False
45 elif file.date_time is not 0: 61 elif fileinfo.date_time is not 0:
46 return False 62 return False
47 elif file.create_system is not 0: 63 elif fileinfo.create_system is not 0:
48 return False 64 return False
49 elif file.create_version is not 0: 65 elif fileinfo.create_version is not 0:
50 return False 66 return False
51 else: 67 else:
52 return True 68 return True
@@ -74,7 +90,7 @@ class ZipStripper(GenericArchiveStripper):
74 #best solution I have found 90 #best solution I have found
75 logging.info('%s\'s fileformat is not supported, or is a \ 91 logging.info('%s\'s fileformat is not supported, or is a \
76harmless format' % item.filename) 92harmless format' % item.filename)
77 base, ext = os.path.splitext(name) 93 _, ext = os.path.splitext(name)
78 bname = os.path.basename(item.filename) 94 bname = os.path.basename(item.filename)
79 if ext not in parser.NOMETA: 95 if ext not in parser.NOMETA:
80 if bname != 'mimetype': 96 if bname != 'mimetype':
@@ -84,6 +100,10 @@ harmless format' % item.filename)
84 return True 100 return True
85 101
86 def get_meta(self): 102 def get_meta(self):
103 '''
104 Return all the metadata of a ZipFile (don't return metadatas
105 of contained files : should it ?)
106 '''
87 zipin = zipfile.ZipFile(self.filename, 'r') 107 zipin = zipfile.ZipFile(self.filename, 'r')
88 metadata = {} 108 metadata = {}
89 for field in zipin.infolist(): 109 for field in zipin.infolist():
@@ -231,6 +251,9 @@ class TarStripper(GenericArchiveStripper):
231 251
232 252
233class GzipStripper(TarStripper): 253class GzipStripper(TarStripper):
254 '''
255 Represent a tar.gz archive
256 '''
234 def __init__(self, realname, filename, parser, editor, backup, 257 def __init__(self, realname, filename, parser, editor, backup,
235 add2archive): 258 add2archive):
236 super(GzipStripper, self).__init__(realname, 259 super(GzipStripper, self).__init__(realname,
@@ -239,6 +262,9 @@ class GzipStripper(TarStripper):
239 262
240 263
241class Bzip2Stripper(TarStripper): 264class Bzip2Stripper(TarStripper):
265 '''
266 Represents a tar.bz2 archive
267 '''
242 def __init__(self, realname, filename, parser, editor, backup, 268 def __init__(self, realname, filename, parser, editor, backup,
243 add2archive): 269 add2archive):
244 super(Bzip2Stripper, self).__init__(realname, 270 super(Bzip2Stripper, self).__init__(realname,
diff --git a/lib/audio.py b/lib/audio.py
index 35d4fde..d77efd9 100644
--- a/lib/audio.py
+++ b/lib/audio.py
@@ -1,7 +1,7 @@
1import parser 1import parser
2 2
3 3
4class MpegAudioStripper(parser.Generic_parser): 4class MpegAudioStripper(parser.GenericParser):
5 ''' 5 '''
6 mpeg audio file (mp3, ...) 6 mpeg audio file (mp3, ...)
7 ''' 7 '''
diff --git a/lib/images.py b/lib/images.py
index bab0bfb..df3d256 100644
--- a/lib/images.py
+++ b/lib/images.py
@@ -1,7 +1,10 @@
1import parser 1import parser
2 2
3 3
4class JpegStripper(parser.Generic_parser): 4class JpegStripper(parser.GenericParser):
5 '''
6 Represents a .jpeg file
7 '''
5 def _should_remove(self, field): 8 def _should_remove(self, field):
6 if field.name.startswith('comment'): 9 if field.name.startswith('comment'):
7 return True 10 return True
@@ -11,7 +14,10 @@ class JpegStripper(parser.Generic_parser):
11 return False 14 return False
12 15
13 16
14class PngStripper(parser.Generic_parser): 17class PngStripper(parser.GenericParser):
18 '''
19 Represents a .png file
20 '''
15 def _should_remove(self, field): 21 def _should_remove(self, field):
16 if field.name.startswith("text["): 22 if field.name.startswith("text["):
17 return True 23 return True
diff --git a/lib/mat.py b/lib/mat.py
index e4371ce..8d01e05 100644
--- a/lib/mat.py
+++ b/lib/mat.py
@@ -25,7 +25,7 @@ LOGGING_LEVEL = logging.DEBUG
25 25
26logging.basicConfig(level=LOGGING_LEVEL) 26logging.basicConfig(level=LOGGING_LEVEL)
27 27
28strippers = { 28STRIPPERS = {
29 hachoir_parser.image.JpegFile: images.JpegStripper, 29 hachoir_parser.image.JpegFile: images.JpegStripper,
30 hachoir_parser.image.PngFile: images.PngStripper, 30 hachoir_parser.image.PngFile: images.PngStripper,
31 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper, 31 hachoir_parser.audio.MpegAudioFile: audio.MpegAudioStripper,
@@ -83,7 +83,7 @@ def create_class_file(name, backup, add2archive):
83 (which herits from the "file" class), based on the editor 83 (which herits from the "file" class), based on the editor
84 of given file (name) 84 of given file (name)
85 ''' 85 '''
86 stripper_class = strippers[editor.input.__class__] 86 stripper_class = STRIPPERS[editor.input.__class__]
87 except KeyError: 87 except KeyError:
88 #Place for another lib than hachoir 88 #Place for another lib than hachoir
89 logging.info('Don\'t have stripper for format %s' % editor.description) 89 logging.info('Don\'t have stripper for format %s' % editor.description)
diff --git a/lib/misc.py b/lib/misc.py
index ce14313..f846388 100644
--- a/lib/misc.py
+++ b/lib/misc.py
@@ -2,7 +2,7 @@ import hachoir_core
2import parser 2import parser
3 3
4 4
5class TorrentStripper(parser.Generic_parser): 5class TorrentStripper(parser.GenericParser):
6 ''' 6 '''
7 A torrent file looks like: 7 A torrent file looks like:
8 -root 8 -root
@@ -21,8 +21,7 @@ class TorrentStripper(parser.Generic_parser):
21 if self._should_remove(field): 21 if self._should_remove(field):
22 #FIXME : hachoir does not support torrent metadata editing :< 22 #FIXME : hachoir does not support torrent metadata editing :<
23 del self.editor['/root/' + field.name] 23 del self.editor['/root/' + field.name]
24 hachoir_core.field.writeIntoFile(self.editor, 24 hachoir_core.field.writeIntoFile(self.editor, self.output)
25 self.filename + parser.POSTFIX)
26 self.do_backup() 25 self.do_backup()
27 26
28 def is_clean(self): 27 def is_clean(self):
diff --git a/lib/office.py b/lib/office.py
index 432bc0b..5fa475d 100644
--- a/lib/office.py
+++ b/lib/office.py
@@ -6,6 +6,7 @@ import glob
6import logging 6import logging
7import zipfile 7import zipfile
8import re 8import re
9import shutil
9from xml.etree import ElementTree 10from xml.etree import ElementTree
10 11
11 12
@@ -97,7 +98,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
97 return True 98 return True
98 99
99 100
100class PdfStripper(parser.Generic_parser): 101class PdfStripper(parser.GenericParser):
101 ''' 102 '''
102 Represent a pdf file, with the help of pdfrw 103 Represent a pdf file, with the help of pdfrw
103 ''' 104 '''
@@ -109,10 +110,17 @@ class PdfStripper(parser.Generic_parser):
109 self.realname = realname 110 self.realname = realname
110 self.shortname = os.path.basename(filename) 111 self.shortname = os.path.basename(filename)
111 self.mime = mimetypes.guess_type(filename)[0] 112 self.mime = mimetypes.guess_type(filename)[0]
113 self.tempdir = tempfile.mkdtemp()
112 self.trailer = pdfrw.PdfReader(self.filename) 114 self.trailer = pdfrw.PdfReader(self.filename)
113 self.writer = pdfrw.PdfWriter() 115 self.writer = pdfrw.PdfWriter()
114 self.convert = 'gm convert -antialias -enhance %s %s' 116 self.convert = 'gm convert -antialias -enhance %s %s'
115 117
118 def __del__(self):
119 '''
120 Remove the temp dir
121 '''
122 shutil.rmtree(self.tempdir)
123
116 def remove_all(self): 124 def remove_all(self):
117 ''' 125 '''
118 Remove all the meta fields that are compromizing 126 Remove all the meta fields that are compromizing
@@ -133,19 +141,18 @@ class PdfStripper(parser.Generic_parser):
133 Transform each pages into a jpg, clean them, 141 Transform each pages into a jpg, clean them,
134 then re-assemble them into a new pdf 142 then re-assemble them into a new pdf
135 ''' 143 '''
136 _, self.tmpdir = tempfile.mkstemp() 144 subprocess.call(self.convert % (self.filename, self.tempdir +
137 subprocess.call(self.convert % (self.filename, self.tmpdir +
138 'temp.jpg'), shell=True) # Convert pages to jpg 145 'temp.jpg'), shell=True) # Convert pages to jpg
139 146
140 for current_file in glob.glob(self.tmpdir + 'temp*'): 147 for current_file in glob.glob(self.tempdir + 'temp*'):
141 #Clean every jpg image 148 #Clean every jpg image
142 class_file = mat.create_class_file(current_file, False) 149 class_file = mat.create_class_file(current_file, False, False)
143 class_file.remove_all() 150 class_file.remove_all()
144 151
145 subprocess.call(self.convert % (self.tmpdir + 152 subprocess.call(self.convert % (self.tempdir +
146 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf 153 'temp.jpg*', self.output), shell=True) # Assemble jpg into pdf
147 154
148 for current_file in glob.glob(self.tmpdir + 'temp*'): 155 for current_file in glob.glob(self.tempdir + 'temp*'):
149 #remove jpg files 156 #remove jpg files
150 mat.secure_remove(current_file) 157 mat.secure_remove(current_file)
151 158
@@ -155,7 +162,7 @@ class PdfStripper(parser.Generic_parser):
155 name = self.realname 162 name = self.realname
156 else: 163 else:
157 name = self.output 164 name = self.output
158 class_file = mat.create_class_file(name, False) 165 class_file = mat.create_class_file(name, False, False)
159 class_file.remove_all() 166 class_file.remove_all()
160 167
161 def is_clean(self): 168 def is_clean(self):
diff --git a/lib/parser.py b/lib/parser.py
index 28e0849..ae647fe 100644
--- a/lib/parser.py
+++ b/lib/parser.py
@@ -12,7 +12,7 @@ import mat
12NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml') 12NOMETA = ('.bmp', 'html', '.py', '.rdf', '.txt', '.xml')
13 13
14 14
15class Generic_parser(object): 15class GenericParser(object):
16 def __init__(self, realname, filename, parser, editor, backup, 16 def __init__(self, realname, filename, parser, editor, backup,
17 add2archive): 17 add2archive):
18 basename, ext = os.path.splitext(filename) 18 basename, ext = os.path.splitext(filename)
@@ -78,7 +78,7 @@ class Generic_parser(object):
78 return True if the field is compromizing 78 return True if the field is compromizing
79 abstract method 79 abstract method
80 ''' 80 '''
81 raise NotImplementedError() 81 raise NotImplementedError
82 82
83 def do_backup(self): 83 def do_backup(self):
84 ''' 84 '''