diff options
| -rw-r--r-- | FORMATS | 86 | ||||
| -rw-r--r-- | lib/mat.py | 42 |
2 files changed, 126 insertions, 2 deletions
| @@ -0,0 +1,86 @@ | |||
| 1 | <xml> | ||
| 2 | <format> | ||
| 3 | <name>Portable Network Graphics</name> | ||
| 4 | <extension>.png</extension> | ||
| 5 | <support>full</support> | ||
| 6 | <metadata>textual metadata + date</metadata> | ||
| 7 | <method>removal of harmful fields is done with hachoir</method> | ||
| 8 | </format> | ||
| 9 | |||
| 10 | <format> | ||
| 11 | <name>Jpeg</name> | ||
| 12 | <extension>.jpeg, .jpg</extension> | ||
| 13 | <support>full</support> | ||
| 14 | <metadata>comment + exif/photoshop/adobe</metadata> | ||
| 15 | <method>removal of harmful fields is done with hachoir</method> | ||
| 16 | </format> | ||
| 17 | |||
| 18 | <format> | ||
| 19 | <name>Open Document</name> | ||
| 20 | <extension>.odt, .odx, .ods, ...</extension> | ||
| 21 | <support>full</support> | ||
| 22 | <metadata>a meta.xml file</metadata> | ||
| 23 | <method>removal of the meta.xml file</method> | ||
| 24 | </format> | ||
| 25 | |||
| 26 | <format> | ||
| 27 | <name>Portable Document Fileformat</name> | ||
| 28 | <extension>.pdf</extension> | ||
| 29 | <support>full</support> | ||
| 30 | <metadata>a lot</metadata> | ||
| 31 | <method>rendering of the pdf file on a cairo surface with the help of | ||
| 32 | poppler in order to remove all the internal metadata, | ||
| 33 | then removal of the remaining metadata fields of the pdf itself with | ||
| 34 | pdfrw (the next version of python-cairo will support metadata, | ||
| 35 | so we should get rid of pdfrw)</method> | ||
| 36 | </format> | ||
| 37 | |||
| 38 | <format> | ||
| 39 | <name>Tape ARchive</name> | ||
| 40 | <extension>.tar, .tar.bz2, .tar.gz</extension> | ||
| 41 | <support>full</support> | ||
| 42 | <metadata>metadata from the file itself, metadata from the file contained | ||
| 43 | into the archive, and metadata added by tar to the file at then | ||
| 44 | creation of the archive</metadata> | ||
| 45 | <method>extraction of each file, treatement of the file, add treated file | ||
| 46 | to a new archive, right before the add, remove the metadata added by tar | ||
| 47 | itself. When the new archive is complete, remove all his metadata.</method> | ||
| 48 | </format> | ||
| 49 | |||
| 50 | <format> | ||
| 51 | <name>Zip</name> | ||
| 52 | <extension>.zip</extension> | ||
| 53 | <support>.partial</support> | ||
| 54 | <metadata>metadata from the file itself, metadata from the file contained | ||
| 55 | into the archive, and metadata added by zip to the file when added to | ||
| 56 | the archive. | ||
| 57 | </metadata> | ||
| 58 | <method>extraction of each file, treatement of the file, add treated file | ||
| 59 | to a new archive. When the new archive is complete, remove all his metadata</method> | ||
| 60 | <remaining>metadata added by zip itself to internal files</remaining> | ||
| 61 | </format> | ||
| 62 | |||
| 63 | <format> | ||
| 64 | <name>MPEG Audio</name> | ||
| 65 | <extension>.mp3, .mp2, .mp1</extension> | ||
| 66 | <support>full</support> | ||
| 67 | <metadata>id3</metadata> | ||
| 68 | <method>removal of harmful fields is done with hachoir</method> | ||
| 69 | </format> | ||
| 70 | |||
| 71 | <format> | ||
| 72 | <name>Ogg Vorbis</name> | ||
| 73 | <extension>.ogg</extension> | ||
| 74 | <support>full</support> | ||
| 75 | <metadata>Vorbis</metadata> | ||
| 76 | <method>removal of harmful fields is done with mutagen</method> | ||
| 77 | </format> | ||
| 78 | |||
| 79 | <format> | ||
| 80 | <name>Free Lossless Audio Codec</name> | ||
| 81 | <extension>.flac</extension> | ||
| 82 | <support>full</support> | ||
| 83 | <metadata>Flac, Vorbis</metadata> | ||
| 84 | <method>removal of harmful fields is done with mutagen</method> | ||
| 85 | </format> | ||
| 86 | </xml> | ||
| @@ -7,6 +7,7 @@ | |||
| 7 | import os | 7 | import os |
| 8 | import subprocess | 8 | import subprocess |
| 9 | import logging | 9 | import logging |
| 10 | import xml.sax | ||
| 10 | 11 | ||
| 11 | import hachoir_core.cmd_line | 12 | import hachoir_core.cmd_line |
| 12 | import hachoir_parser | 13 | import hachoir_parser |
| @@ -45,13 +46,50 @@ except ImportError: | |||
| 45 | try: | 46 | try: |
| 46 | import mutagen | 47 | import mutagen |
| 47 | STRIPPERS['audio/x-flac'] = audio.FlacStripper | 48 | STRIPPERS['audio/x-flac'] = audio.FlacStripper |
| 48 | STRIPPERS['audio/x-ape'] = audio.Apev2Stripper | ||
| 49 | STRIPPERS['audio/x-wavpack'] = audio.Apev2Stripper | ||
| 50 | STRIPPERS['audio/vorbis'] = audio.OggStripper | 49 | STRIPPERS['audio/vorbis'] = audio.OggStripper |
| 51 | except ImportError: | 50 | except ImportError: |
| 52 | print('unable to import python-mutagen : limited audio format support') | 51 | print('unable to import python-mutagen : limited audio format support') |
| 53 | 52 | ||
| 54 | 53 | ||
| 54 | class XMLParser(xml.sax.handler.ContentHandler): | ||
| 55 | ''' | ||
| 56 | Parse the supported format xml, and return a corresponding | ||
| 57 | list of dict | ||
| 58 | ''' | ||
| 59 | def __init__(self): | ||
| 60 | self.dict = {} | ||
| 61 | self.list = [] | ||
| 62 | self.content, self.key = '', '' | ||
| 63 | self.between= False | ||
| 64 | |||
| 65 | def startElement(self, name, attrs): | ||
| 66 | ''' | ||
| 67 | Called when entering into xml balise | ||
| 68 | ''' | ||
| 69 | self.between = True | ||
| 70 | self.key = name | ||
| 71 | self.content = '' | ||
| 72 | |||
| 73 | def endElement(self, name): | ||
| 74 | ''' | ||
| 75 | Called when exiting a xml balise | ||
| 76 | ''' | ||
| 77 | if name == 'format': # exiting a fileformat section | ||
| 78 | self.list.append(self.dict.copy()) | ||
| 79 | self.dict.clear() | ||
| 80 | else: | ||
| 81 | content = self.content.replace('\n', ' ') | ||
| 82 | self.dict[self.key] = content | ||
| 83 | self.between = False | ||
| 84 | |||
| 85 | def characters(self, characters): | ||
| 86 | ''' | ||
| 87 | Concatenate the content between opening and closing balises | ||
| 88 | ''' | ||
| 89 | if self.between is True: | ||
| 90 | self.content += characters | ||
| 91 | |||
| 92 | |||
| 55 | def secure_remove(filename): | 93 | def secure_remove(filename): |
| 56 | ''' | 94 | ''' |
| 57 | securely remove the file | 95 | securely remove the file |
