From 4c81e731a485d3ea84049ef6d568153c8b10e90b Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Sun, 27 Oct 2013 23:01:20 +0000
Subject: Improves documentation

---
 MAT/archive.py         | 25 +++++++++++--------------
 MAT/audio.py           | 21 +++++++--------------
 MAT/images.py          | 45 ++++++++++++++++++++++++++++-----------------
 MAT/mat.py             | 50 +++++++++++++++++++++++---------------------------
 MAT/mutagenstripper.py |  4 +++-
 MAT/office.py          | 30 ++++++++++--------------------
 MAT/parser.py          | 33 +++++++++++++++------------------
 MAT/strippers.py       | 11 +++++------
 8 files changed, 102 insertions(+), 117 deletions(-)

diff --git a/MAT/archive.py b/MAT/archive.py
index 447f068..f07e18c 100644
--- a/MAT/archive.py
+++ b/MAT/archive.py
@@ -1,21 +1,19 @@
-'''
-    Take care of archives formats
+''' Take care of archives formats
 '''
 
-import zipfile
-import shutil
-import os
 import logging
+import os
+import shutil
 import tempfile
+import zipfile
 
-import parser
 import mat
+import parser
 import tarfile
 
 
 class GenericArchiveStripper(parser.GenericParser):
-    '''
-        Represent a generic archive
+    ''' Represent a generic archive
     '''
     def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
         super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
@@ -24,8 +22,7 @@ class GenericArchiveStripper(parser.GenericParser):
         self.tempdir = tempfile.mkdtemp()
 
     def __del__(self):
-        '''
-            Remove the files inside the temp dir,
+        ''' Remove the files inside the temp dir,
             then remove the temp dir
         '''
         for root, dirs, files in os.walk(self.tempdir):
@@ -35,16 +32,16 @@ class GenericArchiveStripper(parser.GenericParser):
         shutil.rmtree(self.tempdir)
 
     def remove_all(self):
+        ''' Virtual method to remove all metadata
+        '''
         raise NotImplementedError
 
 
 class ZipStripper(GenericArchiveStripper):
-    '''
-        Represent a zip file
+    ''' Represent a zip file
     '''
     def is_file_clean(self, fileinfo):
-        '''
-            Check if a ZipInfo object is clean of metadatas added
+        ''' Check if a ZipInfo object is clean of metadatas added
             by zip itself, independently of the corresponding file metadatas
         '''
         if fileinfo.comment:
diff --git a/MAT/audio.py b/MAT/audio.py
index 3c6c7bc..dae9d75 100644
--- a/MAT/audio.py
+++ b/MAT/audio.py
@@ -1,5 +1,4 @@
-'''
-    Care about audio fileformat
+''' Care about audio fileformat
 '''
 
 try:
@@ -13,31 +12,27 @@ import mutagenstripper
 
 
 class MpegAudioStripper(parser.GenericParser):
-    '''
-        Represent mpeg audio file (mp3, ...)
+    ''' Represent mpeg audio file (mp3, ...)
     '''
     def _should_remove(self, field):
         return field.name in ("id3v1", "id3v2")
 
 
 class OggStripper(mutagenstripper.MutagenStripper):
-    '''
-        Represent an ogg vorbis file
+    ''' Represent an ogg vorbis file
     '''
     def _create_mfile(self):
         self.mfile = OggVorbis(self.filename)
 
 
 class FlacStripper(mutagenstripper.MutagenStripper):
-    '''
-        Represent a Flac audio file
+    ''' Represent a Flac audio file
     '''
     def _create_mfile(self):
         self.mfile = FLAC(self.filename)
 
     def remove_all(self):
-        '''
-            Remove the "metadata" block from the file
+        ''' Remove the "metadata" block from the file
         '''
         super(FlacStripper, self).remove_all()
         self.mfile.clear_pictures()
@@ -45,14 +40,12 @@ class FlacStripper(mutagenstripper.MutagenStripper):
         return True
 
     def is_clean(self):
-        '''
-            Check if the "metadata" block is present in the file
+        ''' Check if the "metadata" block is present in the file
         '''
         return super(FlacStripper, self).is_clean() and not self.mfile.pictures
 
     def get_meta(self):
-        '''
-            Return the content of the metadata block if present
+        ''' Return the content of the metadata block if present
         '''
         metadata = super(FlacStripper, self).get_meta()
         if self.mfile.pictures:
diff --git a/MAT/images.py b/MAT/images.py
index 55c1a90..dc96e6a 100644
--- a/MAT/images.py
+++ b/MAT/images.py
@@ -1,41 +1,52 @@
-'''
-    Takes care about pictures formats
+''' Takes care about pictures formats
+
+References:
+    - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm
+    - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html
+    - PNG: http://www.w3.org/TR/PNG-Chunks.html
 '''
 
 import parser
 
 
 class JpegStripper(parser.GenericParser):
-    '''
-        represents a jpeg file
+    ''' Represents a jpeg file.
+        Custom Huffman and Quantization tables
+        are stripped: they may leak
+        some info, and the quality loss is minor.
     '''
     def _should_remove(self, field):
+        ''' Return True if the field is compromising
         '''
-            return True if the field is compromising
-        '''
-        field_list = frozenset(['start_image', 'app0', 'start_frame',
-                'start_scan', 'data', 'end_image'])
+        field_list = frozenset([
+            'start_image',  # start of the image
+            'app0',         # JFIF data
+            'start_frame',  # specify width, height, number of components
+            'start_scan',   # specify which slice of data the top-to-bottom scan contains
+            'data',         # actual data
+            'end_image'])   # end of the image
         if field.name in field_list:
             return False
-        elif field.name.startswith('quantization['):
+        elif field.name.startswith('quantization['):  # custom Quant. tables
             return False
-        elif field.name.startswith('huffman['):
+        elif field.name.startswith('huffman['):  # custom Huffman tables
             return False
         return True
 
 
 class PngStripper(parser.GenericParser):
-    '''
-        represents a png file
-        see : http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html
+    ''' Represents a png file
     '''
     def _should_remove(self, field):
+        ''' Return True if the field is compromising
         '''
-            return True if the field is compromising
-        '''
-        field_list = frozenset(['id', 'header', 'physical', 'end'])
+        field_list = frozenset([
+            'id',
+            'header',   # PNG header
+            'physical', # the intended pixel size or aspect ratio
+            'end'])     # end of the image
         if field.name in field_list:
             return False
-        if field.name.startswith('data['):
+        if field.name.startswith('data['):  # data
             return False
         return True
diff --git a/MAT/mat.py b/MAT/mat.py
index a1dc111..a669515 100644
--- a/MAT/mat.py
+++ b/MAT/mat.py
@@ -1,13 +1,12 @@
 #!/usr/bin/env python
 
-'''
-    Metadata anonymisation toolkit library
+''' Metadata anonymisation toolkit library
 '''
 
-import os
-import subprocess
 import logging
 import mimetypes
+import os
+import subprocess
 import xml.sax
 
 import hachoir_core.cmd_line
@@ -33,6 +32,8 @@ logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
 import strippers  # this is loaded here because we need LOGGING_LEVEL
 
 def get_logo():
+    ''' Return the path to the logo
+    '''
     if os.path.isfile('./data/mat.png'):
         return './data/mat.png'
     elif os.path.isfile('/usr/share/pixmaps/mat.png'):
@@ -41,6 +42,8 @@ def get_logo():
         return '/usr/local/share/pixmaps/mat.png'
 
 def get_datadir():
+    ''' Return the path to the data directory
+    '''
     if os.path.isdir('./data/'):
         return './data/'
     elif os.path.isdir('/usr/local/share/mat/'):
@@ -49,8 +52,9 @@ def get_datadir():
         return '/usr/share/mat/'
 
 def list_supported_formats():
-    '''
-        Return a list of all locally supported fileformat
+    ''' Return a list of all locally supported fileformat.
+        It parses that FORMATS file, and removes locally
+        non-supported formats.
     '''
     handler = XMLParser()
     parser = xml.sax.make_parser()
@@ -67,8 +71,7 @@ def list_supported_formats():
     return localy_supported
 
 class XMLParser(xml.sax.handler.ContentHandler):
-    '''
-        Parse the supported format xml, and return a corresponding
+    ''' Parse the supported format xml, and return a corresponding
         list of dict
     '''
     def __init__(self):
@@ -78,18 +81,16 @@ class XMLParser(xml.sax.handler.ContentHandler):
         self.between = False
 
     def startElement(self, name, attrs):
-        '''
-            Called when entering into xml tag
+        ''' Called when entering into xml tag
         '''
         self.between = True
         self.key = name
         self.content = ''
 
     def endElement(self, name):
+        ''' Called when exiting a xml tag
         '''
-            Called when exiting a xml tag
-        '''
-        if name == 'format':  # exiting a fileformat section
+        if name == 'format':  # leaving a fileformat section
             self.list.append(self.dict.copy())
             self.dict.clear()
         else:
@@ -98,19 +99,17 @@ class XMLParser(xml.sax.handler.ContentHandler):
             self.between = False
 
     def characters(self, characters):
-        '''
-            Concatenate the content between opening and closing tags
+        ''' Concatenate the content between opening and closing tags
         '''
         if self.between:
             self.content += characters
 
 
 def secure_remove(filename):
-    '''
-        securely remove the file
+    ''' Securely remove the file
     '''
     try:
-        if subprocess.call(['shred', '--remove', filename]) == 0:
+        if not subprocess.call(['shred', '--remove', filename]):
             return True
         else:
             raise OSError
@@ -126,22 +125,17 @@ def secure_remove(filename):
 
 
 def create_class_file(name, backup, **kwargs):
-    '''
-        return a $FILETYPEStripper() class,
+    ''' Return a $FILETYPEStripper() class,
         corresponding to the filetype of the given file
     '''
-    if not os.path.isfile(name):
-        # check if the file exists
+    if not os.path.isfile(name):  # check if the file exists
         logging.error('%s is not a valid file' % name)
         return None
 
-    if not os.access(name, os.R_OK):
-        #check read permissions
+    if not os.access(name, os.R_OK):  #check read permissions
         logging.error('%s is is not readable' % name)
         return None
 
-    is_writable = os.access(name, os.W_OK)
-
     if not os.path.getsize(name):
         #check if the file is not empty (hachoir crash on empty files)
         logging.error('%s is empty' % name)
@@ -161,7 +155,7 @@ def create_class_file(name, backup, **kwargs):
     mime = parser.mime_type
 
     if mime == 'application/zip':  # some formats are zipped stuff
-        if mimetypes.guess_type(name)[0] is not None:
+        if mimetypes.guess_type(name)[0]:
             mime =  mimetypes.guess_type(name)[0]
 
     if mime.startswith('application/vnd.oasis.opendocument'):
@@ -169,6 +163,8 @@ def create_class_file(name, backup, **kwargs):
     elif mime.startswith('application/vnd.openxmlformats-officedocument'):
         mime = 'application/officeopenxml'  # office openxml
 
+    is_writable = os.access(name, os.W_OK)
+
     try:
         stripper_class = strippers.STRIPPERS[mime]
     except KeyError:
diff --git a/MAT/mutagenstripper.py b/MAT/mutagenstripper.py
index ebc6b91..403c9a7 100644
--- a/MAT/mutagenstripper.py
+++ b/MAT/mutagenstripper.py
@@ -1,5 +1,7 @@
+''' Take care of mutagen-supported formats (audio)
+'''
+
 import parser
-import shutil
 
 
 class MutagenStripper(parser.GenericParser):
diff --git a/MAT/office.py b/MAT/office.py
index 583e0f9..91e49be 100644
--- a/MAT/office.py
+++ b/MAT/office.py
@@ -1,5 +1,4 @@
-'''
-    Care about office's formats
+''' Care about office's formats
 '''
 
 import os
@@ -23,14 +22,12 @@ import archive
 
 
 class OpenDocumentStripper(archive.GenericArchiveStripper):
-    '''
-        An open document file is a zip, with xml file into.
+    ''' An open document file is a zip, with xml file into.
         The one that interest us is meta.xml
     '''
 
     def get_meta(self):
-        '''
-            Return a dict with all the meta of the file by
+        ''' Return a dict with all the meta of the file by
             trying to read the meta.xml file.
         '''
         zipin = zipfile.ZipFile(self.filename, 'r')
@@ -103,8 +100,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
         return True
 
     def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
+        ''' Check if the file is clean from harmful metadatas
         '''
         zipin = zipfile.ZipFile(self.filename, 'r')
         try:
@@ -120,8 +116,7 @@ class OpenDocumentStripper(archive.GenericArchiveStripper):
 
 
 class PdfStripper(parser.GenericParser):
-    '''
-        Represent a PDF file
+    ''' Represent a PDF file
     '''
     def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
         super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
@@ -137,8 +132,7 @@ class PdfStripper(parser.GenericParser):
             'producer', 'metadata'])
 
     def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
+        ''' Check if the file is clean from harmful metadatas
         '''
         for key in self.meta_list:
             if self.document.get_property(key):
@@ -146,8 +140,7 @@ class PdfStripper(parser.GenericParser):
         return True
 
     def remove_all(self):
-        '''
-            Opening the PDF with poppler, then doing a render
+        ''' Opening the PDF with poppler, then doing a render
             on a cairo pdfsurface for each pages.
 
             http://cairographics.org/documentation/pycairo/2/
@@ -195,8 +188,7 @@ pdfrw' % self.output)
         return True
 
     def get_meta(self):
-        '''
-            Return a dict with all the meta of the file
+        ''' Return a dict with all the meta of the file
         '''
         metadata = {}
         for key in self.meta_list:
@@ -252,8 +244,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
         return True
 
     def is_clean(self):
-        '''
-            Check if the file is clean from harmful metadatas
+        ''' Check if the file is clean from harmful metadatas
         '''
         zipin = zipfile.ZipFile(self.filename, 'r')
         for item in zipin.namelist():
@@ -265,8 +256,7 @@ class OpenXmlStripper(archive.GenericArchiveStripper):
         return czf.is_clean()
 
     def get_meta(self):
-        '''
-            Return a dict with all the meta of the file
+        ''' Return a dict with all the meta of the file
         '''
         zipin = zipfile.ZipFile(self.filename, 'r')
         metadata = {}
diff --git a/MAT/parser.py b/MAT/parser.py
index c1c3f4c..ae07d7e 100644
--- a/MAT/parser.py
+++ b/MAT/parser.py
@@ -1,22 +1,22 @@
+''' Parent class of all parser
 '''
-    Parent class of all parser
-'''
-
-import hachoir_core
-import hachoir_editor
 
 import os
-import tempfile
 import shutil
+import tempfile
+
+import hachoir_core
+import hachoir_editor
 
 import mat
 
-NOMETA = frozenset(('.bmp',  # image
-          '.rdf',  # text
-          '.txt',  # plain text
-          '.xml',  # formated text (XML)
-          '.rels',  # openXML formated text
-          ))
+NOMETA = frozenset((
+    '.bmp',  # "raw" image
+    '.rdf',  # text
+    '.txt',  # plain text
+    '.xml',  # formated text (XML)
+    '.rels', # openXML formated text
+))
 
 FIELD = object()
 
@@ -92,8 +92,7 @@ class GenericParser(object):
         del fieldset[field]
 
     def get_meta(self):
-        '''
-            Return a dict with all the meta of the file
+        ''' Return a dict with all the meta of the file
         '''
         metadata = {}
         self._get_meta(self.editor, metadata)
@@ -113,8 +112,7 @@ class GenericParser(object):
                 self._get_meta(field, None)
 
     def _should_remove(self, key):
-        '''
-            Return True if the field is compromising
+        ''' Return True if the field is compromising
             abstract method
         '''
         raise NotImplementedError
@@ -125,8 +123,7 @@ class GenericParser(object):
         shutil.copy2(self.filename, self.filename + '.bak')
 
     def do_backup(self):
-        '''
-            Keep a backup of the file if asked.
+        ''' Keep a backup of the file if asked.
 
             The process of double-renaming is not very elegant,
             but it greatly simplify new strippers implementation.
diff --git a/MAT/strippers.py b/MAT/strippers.py
index f6ae899..78113ff 100644
--- a/MAT/strippers.py
+++ b/MAT/strippers.py
@@ -1,16 +1,15 @@
-'''
-    Manage which fileformat can be processed
+''' Manage which fileformat can be processed
 '''
 
-import images
+import archive
 import audio
 import gi
-import office
-import archive
+import images
+import logging
 import mat
 import misc
+import office
 import subprocess
-import logging
 
 STRIPPERS = {
     'application/x-tar': archive.TarStripper,
-- 
cgit v1.3