From 5027bba6c2c1c7546cfc0eb953de38181b7e4e7b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 7 Jan 2013 22:37:54 +0100 Subject: Refactor the get_sharedir function, and move datas to a proper data folder --- FORMATS | 117 ----------------------------------------------------------- MAT/FORMATS | 43 +++++++++++++++++----- MAT/mat.py | 28 +++++++------- data/FORMATS | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ data/mat.png | Bin 0 -> 31340 bytes mat | 2 +- mat-gui | 4 +- mat.png | Bin 31340 -> 0 bytes setup.py | 4 +- 9 files changed, 170 insertions(+), 145 deletions(-) delete mode 100644 FORMATS create mode 100644 data/FORMATS create mode 100644 data/mat.png delete mode 100644 mat.png diff --git a/FORMATS b/FORMATS deleted file mode 100644 index 3f0751c..0000000 --- a/FORMATS +++ /dev/null @@ -1,117 +0,0 @@ - - - Portable Network Graphics - .png - image/png - full - textual metadata + date - removal of harmful fields is done with hachoir - - - - Jpeg - .jpeg, .jpg - image/jpeg - partial - comment + exif/photoshop/adobe - removal of harmful fields is done with hachoir - Canon Raw tags : - http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/CanonRaw.html - - - - - Open Document - .odt, .odx, .ods, ... - application/opendocument - full - a meta.xml file - removal of the meta.xml file - - - - Office Openxml - .docx, .pptx, .xlsx, ... - application/officeopenxml - full - a docProps folder containings xml metadata files - removal of the docProps folder - - - - Portable Document Fileformat - .pdf - application/pdf - full - a lot - rendering of the PDF file on a cairo surface with the help of - poppler in order to remove all the internal metadata. - For now, cairo create some metadata. - They can be remove if you install either exiftool, or python-pdfrw. - The next version of python-cairo will support PDF metadata. - - - - - Tape ARchive - .tar, .tar.bz2, .tar.gz - application/x-tar, application/x-gzip, application/x-bzip2 - full - metadata from the file itself, metadata from the file contained - into the archive, and metadata added by tar to the file at then - creation of the archive - extraction of each file, treatement of the file, add treated file - to a new archive, right before the add, remove the metadata added by tar - itself. When the new archive is complete, remove all his metadata. - - - - Zip - .zip - application/zip - partial - metadata from the file itself, metadata from the file contained - into the archive, and metadata added by zip to the file when added to - the archive. - extraction of each file, treatement of the file, add treated file - to a new archive. When the new archive is complete, remove all his metadata - metadata added by zip itself to internal files - - - - MPEG Audio - .mp3, .mp2, .mp1, .mpa - audio/mpeg - full - id3 - removal of harmful fields is done with hachoir - - - - Ogg Vorbis - .ogg - audio/vorbis - full - Vorbis - removal of harmful fields is done with mutagen - - - - Free Lossless Audio Codec - .flac - audio/x-flac - full - Flac, Vorbis - removal of harmful fields is done with mutagen - - - - Torrent - .torrent - application/torrent - full - torrent - remove all the compromizing metadata with a heavily tuned version - of the bencode lib by Petru Paled - - diff --git a/MAT/FORMATS b/MAT/FORMATS index c497524..3f0751c 100644 --- a/MAT/FORMATS +++ b/MAT/FORMATS @@ -2,6 +2,7 @@ Portable Network Graphics .png + image/png full textual metadata + date removal of harmful fields is done with hachoir @@ -10,14 +11,19 @@ Jpeg .jpeg, .jpg - full + image/jpeg + partial comment + exif/photoshop/adobe removal of harmful fields is done with hachoir + Canon Raw tags : + http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/CanonRaw.html + Open Document .odt, .odx, .ods, ... + application/opendocument full a meta.xml file removal of the meta.xml file @@ -26,6 +32,7 @@ Office Openxml .docx, .pptx, .xlsx, ... + application/officeopenxml full a docProps folder containings xml metadata files removal of the docProps folder @@ -34,18 +41,21 @@ Portable Document Fileformat .pdf + application/pdf full a lot - rendering of the pdf file on a cairo surface with the help of - poppler in order to remove all the internal metadata, - then removal of the remaining metadata fields of the pdf itself with - pdfrw (the next version of python-cairo will support metadata, - so we should get rid of pdfrw) + rendering of the PDF file on a cairo surface with the help of + poppler in order to remove all the internal metadata. + For now, cairo create some metadata. + They can be remove if you install either exiftool, or python-pdfrw. + The next version of python-cairo will support PDF metadata. + Tape ARchive .tar, .tar.bz2, .tar.gz + application/x-tar, application/x-gzip, application/x-bzip2 full metadata from the file itself, metadata from the file contained into the archive, and metadata added by tar to the file at then @@ -58,11 +68,11 @@ Zip .zip - .partial + application/zip + partial metadata from the file itself, metadata from the file contained into the archive, and metadata added by zip to the file when added to - the archive. - + the archive. extraction of each file, treatement of the file, add treated file to a new archive. When the new archive is complete, remove all his metadata metadata added by zip itself to internal files @@ -70,7 +80,8 @@ MPEG Audio - .mp3, .mp2, .mp1 + .mp3, .mp2, .mp1, .mpa + audio/mpeg full id3 removal of harmful fields is done with hachoir @@ -79,6 +90,7 @@ Ogg Vorbis .ogg + audio/vorbis full Vorbis removal of harmful fields is done with mutagen @@ -87,8 +99,19 @@ Free Lossless Audio Codec .flac + audio/x-flac full Flac, Vorbis removal of harmful fields is done with mutagen + + + Torrent + .torrent + application/torrent + full + torrent + remove all the compromizing metadata with a heavily tuned version + of the bencode lib by Petru Paled + diff --git a/MAT/mat.py b/MAT/mat.py index 7b5149f..b18cc63 100644 --- a/MAT/mat.py +++ b/MAT/mat.py @@ -30,19 +30,21 @@ LOGGING_LEVEL = logging.DEBUG logging.basicConfig(filename=fname, level=LOGGING_LEVEL) - -def get_sharedir(filename): - ''' - An ugly hack to find various files - ''' - if os.path.isfile(filename): - return filename - elif os.path.exists(os.path.join('/usr/local/share/mat/', filename)): - return os.path.join('/usr/local/share/mat/', filename) - elif os.path.exists(os.path.join('/usr/share/mat/', filename)): - return os.path.join('/usr/share/mat', filename) - elif os.path.exists(os.path.join('/usr/local/share/pixmaps/', filename)): - return os.path.join('/usr/local/share/pixmaps/', filename) +def get_logo(): + if os.path.isfile('./data/mat.png'): + return './data/mat.png' + elif os.path.isfile('/usr/share/pixmaps/mat.png'): + return '/usr/share/pixmaps/mat.png' + elif os.path.isfile('/usr/local/share/pixmaps/mat.png'): + return '/usr/local/share/pixmaps/mat.png' + +def get_formats(): + if os.path.isfile('./data/FORMATS'): + return './data/FORMATS' + elif os.path.isfile('/usr/share/mat/FORMATS'): + return '/usr/share/mat/FORMATS' + elif os.path.isfile('/usr/local/share/mat/FORMATS'): + return '/usr/local/share/mat/FORMATS' class XMLParser(xml.sax.handler.ContentHandler): diff --git a/data/FORMATS b/data/FORMATS new file mode 100644 index 0000000..3f0751c --- /dev/null +++ b/data/FORMATS @@ -0,0 +1,117 @@ + + + Portable Network Graphics + .png + image/png + full + textual metadata + date + removal of harmful fields is done with hachoir + + + + Jpeg + .jpeg, .jpg + image/jpeg + partial + comment + exif/photoshop/adobe + removal of harmful fields is done with hachoir + Canon Raw tags : + http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/CanonRaw.html + + + + + Open Document + .odt, .odx, .ods, ... + application/opendocument + full + a meta.xml file + removal of the meta.xml file + + + + Office Openxml + .docx, .pptx, .xlsx, ... + application/officeopenxml + full + a docProps folder containings xml metadata files + removal of the docProps folder + + + + Portable Document Fileformat + .pdf + application/pdf + full + a lot + rendering of the PDF file on a cairo surface with the help of + poppler in order to remove all the internal metadata. + For now, cairo create some metadata. + They can be remove if you install either exiftool, or python-pdfrw. + The next version of python-cairo will support PDF metadata. + + + + + Tape ARchive + .tar, .tar.bz2, .tar.gz + application/x-tar, application/x-gzip, application/x-bzip2 + full + metadata from the file itself, metadata from the file contained + into the archive, and metadata added by tar to the file at then + creation of the archive + extraction of each file, treatement of the file, add treated file + to a new archive, right before the add, remove the metadata added by tar + itself. When the new archive is complete, remove all his metadata. + + + + Zip + .zip + application/zip + partial + metadata from the file itself, metadata from the file contained + into the archive, and metadata added by zip to the file when added to + the archive. + extraction of each file, treatement of the file, add treated file + to a new archive. When the new archive is complete, remove all his metadata + metadata added by zip itself to internal files + + + + MPEG Audio + .mp3, .mp2, .mp1, .mpa + audio/mpeg + full + id3 + removal of harmful fields is done with hachoir + + + + Ogg Vorbis + .ogg + audio/vorbis + full + Vorbis + removal of harmful fields is done with mutagen + + + + Free Lossless Audio Codec + .flac + audio/x-flac + full + Flac, Vorbis + removal of harmful fields is done with mutagen + + + + Torrent + .torrent + application/torrent + full + torrent + remove all the compromizing metadata with a heavily tuned version + of the bencode lib by Petru Paled + + diff --git a/data/mat.png b/data/mat.png new file mode 100644 index 0000000..4198c42 Binary files /dev/null and b/data/mat.png differ diff --git a/mat b/mat index 5cd61ee..da43198 100755 --- a/mat +++ b/mat @@ -104,7 +104,7 @@ def list_supported(): handler = mat.XMLParser() parser = xml.sax.make_parser() parser.setContentHandler(handler) - path = mat.get_sharedir('FORMATS') + path = mat.get_formats() with open(path, 'r') as xmlfile: parser.parse(xmlfile) diff --git a/mat-gui b/mat-gui index 121db56..be966b0 100755 --- a/mat-gui +++ b/mat-gui @@ -53,7 +53,7 @@ class GUI: self.window.set_title('Metadata Anonymisation Toolkit') self.window.connect('destroy', gtk.main_quit) self.window.set_default_size(800, 600) - self.logo = mat.get_sharedir('mat.png') + self.logo = mat.get_logo() icon = gtk.gdk.pixbuf_new_from_file_at_size(self.logo, 50, 50) self.window.set_icon(icon) @@ -405,7 +405,7 @@ class GUI: handler = mat.XMLParser() parser = xml.sax.make_parser() parser.setContentHandler(handler) - path = mat.get_sharedir('FORMATS') + path = mat.get_formats() with open(path, 'r') as xmlfile: parser.parse(xmlfile) diff --git a/mat.png b/mat.png deleted file mode 100644 index 4198c42..0000000 Binary files a/mat.png and /dev/null differ diff --git a/setup.py b/setup.py index f69a326..5852466 100755 --- a/setup.py +++ b/setup.py @@ -27,8 +27,8 @@ setup( scripts = ['mat', 'mat-gui'], data_files = [ ( 'share/applications', ['mat.desktop'] ), - ( 'share/mat', ['FORMATS'] ), - ( 'share/pixmaps', ['mat.png'] ), + ( 'share/mat', ['data/FORMATS'] ), + ( 'share/pixmaps', ['data/mat.png'] ), ( 'share/doc/mat', ['README', 'TODO'] ), ( 'share/man/man1', ['mat.1', 'mat-gui.1'] ), ], -- cgit v1.3