diff options
| author | jvoisin | 2011-08-16 18:11:24 +0200 |
|---|---|---|
| committer | jvoisin | 2011-08-16 18:11:24 +0200 |
| commit | 4bd3e47da02fde08acfada1795cc55170abdb00a (patch) | |
| tree | f8c7aa5fd5e1b07a28b350c5ded8125ef2467c51 /lib/mat.py | |
| parent | baf8e080125614326ba9c96ca8f2404fd12b050e (diff) | |
setup.py now works !
Diffstat (limited to 'lib/mat.py')
| -rw-r--r-- | lib/mat.py | 162 |
1 files changed, 0 insertions, 162 deletions
diff --git a/lib/mat.py b/lib/mat.py deleted file mode 100644 index fd13287..0000000 --- a/lib/mat.py +++ /dev/null | |||
| @@ -1,162 +0,0 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | |||
| 3 | ''' | ||
| 4 | Metadata anonymisation toolkit library | ||
| 5 | ''' | ||
| 6 | |||
| 7 | import os | ||
| 8 | import subprocess | ||
| 9 | import logging | ||
| 10 | import mimetypes | ||
| 11 | import xml.sax | ||
| 12 | |||
| 13 | import hachoir_core.cmd_line | ||
| 14 | import hachoir_parser | ||
| 15 | |||
| 16 | import images | ||
| 17 | import audio | ||
| 18 | import office | ||
| 19 | import archive | ||
| 20 | import misc | ||
| 21 | |||
| 22 | __version__ = '0.1' | ||
| 23 | __author__ = 'jvoisin' | ||
| 24 | |||
| 25 | LOGGING_LEVEL = logging.DEBUG | ||
| 26 | |||
| 27 | logging.basicConfig(level=LOGGING_LEVEL) | ||
| 28 | |||
| 29 | STRIPPERS = { | ||
| 30 | 'application/x-tar': archive.TarStripper, | ||
| 31 | 'application/x-gzip': archive.GzipStripper, | ||
| 32 | 'application/x-bzip2': archive.Bzip2Stripper, | ||
| 33 | 'application/zip': archive.ZipStripper, | ||
| 34 | 'audio/mpeg': audio.MpegAudioStripper, | ||
| 35 | 'image/jpeg': images.JpegStripper, | ||
| 36 | 'image/png': images.PngStripper, | ||
| 37 | 'application/x-bittorrent': misc.TorrentStripper, | ||
| 38 | 'application/opendocument': office.OpenDocumentStripper, | ||
| 39 | 'application/officeopenxml': office.OpenXmlStripper, | ||
| 40 | } | ||
| 41 | |||
| 42 | try: | ||
| 43 | import poppler | ||
| 44 | import cairo | ||
| 45 | STRIPPERS['application/x-pdf'] = office.PdfStripper | ||
| 46 | STRIPPERS['application/pdf'] = office.PdfStripper | ||
| 47 | except ImportError: | ||
| 48 | print('Unable to import python-poppler and/or python-cairo: no pdf \ | ||
| 49 | support') | ||
| 50 | |||
| 51 | try: | ||
| 52 | import mutagen | ||
| 53 | STRIPPERS['audio/x-flac'] = audio.FlacStripper | ||
| 54 | STRIPPERS['audio/vorbis'] = audio.OggStripper | ||
| 55 | except ImportError: | ||
| 56 | print('unable to import python-mutagen : limited audio format support') | ||
| 57 | |||
| 58 | |||
| 59 | class XMLParser(xml.sax.handler.ContentHandler): | ||
| 60 | ''' | ||
| 61 | Parse the supported format xml, and return a corresponding | ||
| 62 | list of dict | ||
| 63 | ''' | ||
| 64 | def __init__(self): | ||
| 65 | self.dict = {} | ||
| 66 | self.list = [] | ||
| 67 | self.content, self.key = '', '' | ||
| 68 | self.between = False | ||
| 69 | |||
| 70 | def startElement(self, name, attrs): | ||
| 71 | ''' | ||
| 72 | Called when entering into xml balise | ||
| 73 | ''' | ||
| 74 | self.between = True | ||
| 75 | self.key = name | ||
| 76 | self.content = '' | ||
| 77 | |||
| 78 | def endElement(self, name): | ||
| 79 | ''' | ||
| 80 | Called when exiting a xml balise | ||
| 81 | ''' | ||
| 82 | if name == 'format': # exiting a fileformat section | ||
| 83 | self.list.append(self.dict.copy()) | ||
| 84 | self.dict.clear() | ||
| 85 | else: | ||
| 86 | content = self.content.replace('\s', ' ') | ||
| 87 | self.dict[self.key] = content | ||
| 88 | self.between = False | ||
| 89 | |||
| 90 | def characters(self, characters): | ||
| 91 | ''' | ||
| 92 | Concatenate the content between opening and closing balises | ||
| 93 | ''' | ||
| 94 | if self.between is True: | ||
| 95 | self.content += characters | ||
| 96 | |||
| 97 | |||
| 98 | def secure_remove(filename): | ||
| 99 | ''' | ||
| 100 | securely remove the file | ||
| 101 | ''' | ||
| 102 | removed = False | ||
| 103 | try: | ||
| 104 | subprocess.call('shred --remove %s' % filename, shell=True) | ||
| 105 | removed = True | ||
| 106 | except: | ||
| 107 | logging.error('Unable to securely remove %s' % filename) | ||
| 108 | |||
| 109 | if removed is False: | ||
| 110 | try: | ||
| 111 | os.remove(filename) | ||
| 112 | except: | ||
| 113 | logging.error('Unable to remove %s' % filename) | ||
| 114 | |||
| 115 | |||
| 116 | def is_secure(filename): | ||
| 117 | ''' | ||
| 118 | Prevent shell injection | ||
| 119 | ''' | ||
| 120 | if not(os.path.isfile(filename)): # check if the file exist | ||
| 121 | logging.error('%s is not a valid file' % filename) | ||
| 122 | return False | ||
| 123 | else: | ||
| 124 | return True | ||
| 125 | |||
| 126 | |||
| 127 | def create_class_file(name, backup, add2archive): | ||
| 128 | ''' | ||
| 129 | return a $FILETYPEStripper() class, | ||
| 130 | corresponding to the filetype of the given file | ||
| 131 | ''' | ||
| 132 | if not is_secure(name): | ||
| 133 | return | ||
| 134 | |||
| 135 | filename = '' | ||
| 136 | try: | ||
| 137 | filename = hachoir_core.cmd_line.unicodeFilename(name) | ||
| 138 | except TypeError: # get rid of "decoding Unicode is not supported" | ||
| 139 | filename = name | ||
| 140 | |||
| 141 | parser = hachoir_parser.createParser(filename) | ||
| 142 | if not parser: | ||
| 143 | logging.info('Unable to parse %s' % filename) | ||
| 144 | return | ||
| 145 | |||
| 146 | mime = parser.mime_type | ||
| 147 | |||
| 148 | if mime == 'application/zip': # some formats are zipped stuff | ||
| 149 | mime = mimetypes.guess_type(name)[0] | ||
| 150 | |||
| 151 | if mime.startswith('application/vnd.oasis.opendocument'): | ||
| 152 | mime = 'application/opendocument' # opendocument fileformat | ||
| 153 | elif mime.startswith('application/vnd.openxmlformats-officedocument'): | ||
| 154 | mime = 'application/officeopenxml' # office openxml | ||
| 155 | |||
| 156 | try: | ||
| 157 | stripper_class = STRIPPERS[mime] | ||
| 158 | except KeyError: | ||
| 159 | logging.info('Don\'t have stripper for %s format' % mime) | ||
| 160 | return | ||
| 161 | |||
| 162 | return stripper_class(filename, parser, mime, backup, add2archive) | ||
