summaryrefslogtreecommitdiff
path: root/lib/misc.py
diff options
context:
space:
mode:
authorjvoisin2011-07-23 18:16:37 +0200
committerjvoisin2011-07-23 18:16:37 +0200
commit88665c9ffa82afd9689ed5bd211c7136853e809b (patch)
treeed0d40e1defdaae40562aca1c9056f40c9f98858 /lib/misc.py
parent8862480570367359ae50759d60bc9b4486868600 (diff)
Rename misc.py to office.py
Diffstat (limited to 'lib/misc.py')
-rw-r--r--lib/misc.py139
1 files changed, 0 insertions, 139 deletions
diff --git a/lib/misc.py b/lib/misc.py
deleted file mode 100644
index de38129..0000000
--- a/lib/misc.py
+++ /dev/null
@@ -1,139 +0,0 @@
1import os
2import mimetypes
3import subprocess
4import tempfile
5import glob
6
7import hachoir_core
8
9import pdfrw
10import mat
11import parser
12
13class TorrentStripper(parser.Generic_parser):
14 '''
15 A torrent file looks like:
16 -root
17 -start
18 -announce
19 -announce-list
20 -comment
21 -created_by
22 -creation_date
23 -encoding
24 -info
25 -end
26 '''
27 def remove_all(self):
28 for field in self.editor['root']:
29 if self._should_remove(field):
30 #FIXME : hachoir does not support torrent metadata editing :<
31 del self.editor['/root/' + field.name]
32 hachoir_core.field.writeIntoFile(self.editor,
33 self.filename + parser.POSTFIX)
34 if self.backup is False:
35 mat.secure_remove(self.filename) #remove the old file
36 os.rename(self.filename + parser.POSTFIX, self.filename)
37
38 def is_clean(self):
39 for field in self.editor['root']:
40 if self._should_remove(field):
41 return False
42 return True
43
44 def get_meta(self):
45 metadata = {}
46 for field in self.editor['root']:
47 if self._should_remove(field):
48 try:#FIXME
49 metadata[field.name] = field.value
50 except:
51 metadata[field.name] = 'harmful content'
52 return metadata
53
54 def _should_remove(self, field):
55 if field.name in ('comment', 'created_by', 'creation_date', 'info'):
56 return True
57 else:
58 return False
59
60
61class PdfStripper(parser.Generic_parser):
62 '''
63 Represent a pdf file, with the help of pdfrw
64 '''
65 def __init__(self, filename, realname, backup):
66 self.filename = filename
67 self.backup = backup
68 self.realname = realname
69 self.shortname = os.path.basename(filename)
70 self.mime = mimetypes.guess_type(filename)[0]
71 self.trailer = pdfrw.PdfReader(self.filename)
72 self.writer = pdfrw.PdfWriter()
73 self.convert = 'gm convert -antialias -enhance %s %s'
74
75 def remove_all(self):
76 '''
77 Remove all the meta fields that are compromizing
78 '''
79 self.trailer.Info.Title = ''
80 self.trailer.Info.Author = ''
81 self.trailer.Info.Producer = ''
82 self.trailer.Info.Creator = ''
83 self.trailer.Info.CreationDate = ''
84 self.trailer.Info.ModDate = ''
85
86 self.writer.trailer = self.trailer
87 self.writer.write(self.filename + parser.POSTFIX)
88 if self.backup is False:
89 mat.secure_remove(self.filename) #remove the old file
90 os.rename(self.filename + parser.POSTFIX, self.filename)
91
92 def remove_all_ugly(self):
93 '''
94 Transform each pages into a jpg, clean them,
95 then re-assemble them into a new pdf
96 '''
97 output_file = self.realname + parser.POSTFIX + '.pdf'
98 _, self.tmpdir = tempfile.mkstemp()
99 subprocess.call(self.convert % (self.filename, self.tmpdir +
100 'temp.jpg'), shell=True)#Convert pages to jpg
101
102 for current_file in glob.glob(self.tmpdir + 'temp*'):
103 #Clean every jpg image
104 class_file = mat.create_class_file(current_file, False)
105 class_file.remove_all()
106
107 subprocess.call(self.convert % (self.tmpdir +
108 'temp.jpg*', output_file), shell=True)#Assemble jpg into pdf
109
110 for current_file in glob.glob(self.tmpdir + 'temp*'):
111 #remove jpg files
112 mat.secure_remove(current_file)
113
114 if self.backup is False:
115 mat.secure_remove(self.filename) #remove the old file
116 os.rename(output_file, self.filename)#rename the new
117 name = self.realname
118 else:
119 name = output_file
120 class_file = mat.create_class_file(name, False)
121 class_file.remove_all()
122
123 def is_clean(self):
124 '''
125 Check if the file is clean from harmful metadatas
126 '''
127 for field in self.trailer.Info:
128 if field != '':
129 return False
130 return True
131
132 def get_meta(self):
133 '''
134 return a dict with all the meta of the file
135 '''
136 metadata = {}
137 for key, value in self.trailer.Info.iteritems():
138 metadata[key[1:]] = value[1:-1]
139 return metadata