diff options
| author | jvoisin | 2011-07-23 18:16:37 +0200 |
|---|---|---|
| committer | jvoisin | 2011-07-23 18:16:37 +0200 |
| commit | 88665c9ffa82afd9689ed5bd211c7136853e809b (patch) | |
| tree | ed0d40e1defdaae40562aca1c9056f40c9f98858 /lib/misc.py | |
| parent | 8862480570367359ae50759d60bc9b4486868600 (diff) | |
Rename misc.py to office.py
Diffstat (limited to 'lib/misc.py')
| -rw-r--r-- | lib/misc.py | 139 |
1 files changed, 0 insertions, 139 deletions
diff --git a/lib/misc.py b/lib/misc.py deleted file mode 100644 index de38129..0000000 --- a/lib/misc.py +++ /dev/null | |||
| @@ -1,139 +0,0 @@ | |||
| 1 | import os | ||
| 2 | import mimetypes | ||
| 3 | import subprocess | ||
| 4 | import tempfile | ||
| 5 | import glob | ||
| 6 | |||
| 7 | import hachoir_core | ||
| 8 | |||
| 9 | import pdfrw | ||
| 10 | import mat | ||
| 11 | import parser | ||
| 12 | |||
| 13 | class TorrentStripper(parser.Generic_parser): | ||
| 14 | ''' | ||
| 15 | A torrent file looks like: | ||
| 16 | -root | ||
| 17 | -start | ||
| 18 | -announce | ||
| 19 | -announce-list | ||
| 20 | -comment | ||
| 21 | -created_by | ||
| 22 | -creation_date | ||
| 23 | -encoding | ||
| 24 | -info | ||
| 25 | -end | ||
| 26 | ''' | ||
| 27 | def remove_all(self): | ||
| 28 | for field in self.editor['root']: | ||
| 29 | if self._should_remove(field): | ||
| 30 | #FIXME : hachoir does not support torrent metadata editing :< | ||
| 31 | del self.editor['/root/' + field.name] | ||
| 32 | hachoir_core.field.writeIntoFile(self.editor, | ||
| 33 | self.filename + parser.POSTFIX) | ||
| 34 | if self.backup is False: | ||
| 35 | mat.secure_remove(self.filename) #remove the old file | ||
| 36 | os.rename(self.filename + parser.POSTFIX, self.filename) | ||
| 37 | |||
| 38 | def is_clean(self): | ||
| 39 | for field in self.editor['root']: | ||
| 40 | if self._should_remove(field): | ||
| 41 | return False | ||
| 42 | return True | ||
| 43 | |||
| 44 | def get_meta(self): | ||
| 45 | metadata = {} | ||
| 46 | for field in self.editor['root']: | ||
| 47 | if self._should_remove(field): | ||
| 48 | try:#FIXME | ||
| 49 | metadata[field.name] = field.value | ||
| 50 | except: | ||
| 51 | metadata[field.name] = 'harmful content' | ||
| 52 | return metadata | ||
| 53 | |||
| 54 | def _should_remove(self, field): | ||
| 55 | if field.name in ('comment', 'created_by', 'creation_date', 'info'): | ||
| 56 | return True | ||
| 57 | else: | ||
| 58 | return False | ||
| 59 | |||
| 60 | |||
| 61 | class PdfStripper(parser.Generic_parser): | ||
| 62 | ''' | ||
| 63 | Represent a pdf file, with the help of pdfrw | ||
| 64 | ''' | ||
| 65 | def __init__(self, filename, realname, backup): | ||
| 66 | self.filename = filename | ||
| 67 | self.backup = backup | ||
| 68 | self.realname = realname | ||
| 69 | self.shortname = os.path.basename(filename) | ||
| 70 | self.mime = mimetypes.guess_type(filename)[0] | ||
| 71 | self.trailer = pdfrw.PdfReader(self.filename) | ||
| 72 | self.writer = pdfrw.PdfWriter() | ||
| 73 | self.convert = 'gm convert -antialias -enhance %s %s' | ||
| 74 | |||
| 75 | def remove_all(self): | ||
| 76 | ''' | ||
| 77 | Remove all the meta fields that are compromizing | ||
| 78 | ''' | ||
| 79 | self.trailer.Info.Title = '' | ||
| 80 | self.trailer.Info.Author = '' | ||
| 81 | self.trailer.Info.Producer = '' | ||
| 82 | self.trailer.Info.Creator = '' | ||
| 83 | self.trailer.Info.CreationDate = '' | ||
| 84 | self.trailer.Info.ModDate = '' | ||
| 85 | |||
| 86 | self.writer.trailer = self.trailer | ||
| 87 | self.writer.write(self.filename + parser.POSTFIX) | ||
| 88 | if self.backup is False: | ||
| 89 | mat.secure_remove(self.filename) #remove the old file | ||
| 90 | os.rename(self.filename + parser.POSTFIX, self.filename) | ||
| 91 | |||
| 92 | def remove_all_ugly(self): | ||
| 93 | ''' | ||
| 94 | Transform each pages into a jpg, clean them, | ||
| 95 | then re-assemble them into a new pdf | ||
| 96 | ''' | ||
| 97 | output_file = self.realname + parser.POSTFIX + '.pdf' | ||
| 98 | _, self.tmpdir = tempfile.mkstemp() | ||
| 99 | subprocess.call(self.convert % (self.filename, self.tmpdir + | ||
| 100 | 'temp.jpg'), shell=True)#Convert pages to jpg | ||
| 101 | |||
| 102 | for current_file in glob.glob(self.tmpdir + 'temp*'): | ||
| 103 | #Clean every jpg image | ||
| 104 | class_file = mat.create_class_file(current_file, False) | ||
| 105 | class_file.remove_all() | ||
| 106 | |||
| 107 | subprocess.call(self.convert % (self.tmpdir + | ||
| 108 | 'temp.jpg*', output_file), shell=True)#Assemble jpg into pdf | ||
| 109 | |||
| 110 | for current_file in glob.glob(self.tmpdir + 'temp*'): | ||
| 111 | #remove jpg files | ||
| 112 | mat.secure_remove(current_file) | ||
| 113 | |||
| 114 | if self.backup is False: | ||
| 115 | mat.secure_remove(self.filename) #remove the old file | ||
| 116 | os.rename(output_file, self.filename)#rename the new | ||
| 117 | name = self.realname | ||
| 118 | else: | ||
| 119 | name = output_file | ||
| 120 | class_file = mat.create_class_file(name, False) | ||
| 121 | class_file.remove_all() | ||
| 122 | |||
| 123 | def is_clean(self): | ||
| 124 | ''' | ||
| 125 | Check if the file is clean from harmful metadatas | ||
| 126 | ''' | ||
| 127 | for field in self.trailer.Info: | ||
| 128 | if field != '': | ||
| 129 | return False | ||
| 130 | return True | ||
| 131 | |||
| 132 | def get_meta(self): | ||
| 133 | ''' | ||
| 134 | return a dict with all the meta of the file | ||
| 135 | ''' | ||
| 136 | metadata = {} | ||
| 137 | for key, value in self.trailer.Info.iteritems(): | ||
| 138 | metadata[key[1:]] = value[1:-1] | ||
| 139 | return metadata | ||
