diff options
| author | jvoisin | 2018-03-31 15:46:17 +0200 |
|---|---|---|
| committer | jvoisin | 2018-03-31 15:46:17 +0200 |
| commit | f391c9603c36a8ec80942c23ac6ba39fca5df72a (patch) | |
| tree | 7fdc2053c01f103a675274ebd3e6abcffba4dfbe /src/parsers | |
| parent | 088c3d013ce4515920dea5e0becb98b36afa9a31 (diff) | |
Change a bit the source code organisation
Diffstat (limited to 'src/parsers')
| -rw-r--r-- | src/parsers/__init__.py | 0 | ||||
| -rw-r--r-- | src/parsers/abstract.py | 13 | ||||
| -rw-r--r-- | src/parsers/audio.py | 37 | ||||
| -rw-r--r-- | src/parsers/jpg.py | 30 | ||||
| -rw-r--r-- | src/parsers/pdf.py | 89 | ||||
| -rw-r--r-- | src/parsers/png.py | 27 |
6 files changed, 0 insertions, 196 deletions
diff --git a/src/parsers/__init__.py b/src/parsers/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/src/parsers/__init__.py +++ /dev/null | |||
diff --git a/src/parsers/abstract.py b/src/parsers/abstract.py deleted file mode 100644 index c2d282f..0000000 --- a/src/parsers/abstract.py +++ /dev/null | |||
| @@ -1,13 +0,0 @@ | |||
| 1 | class AbstractParser(object): | ||
| 2 | meta_list = set() | ||
| 3 | mimetypes = set() | ||
| 4 | |||
| 5 | def __init__(self, filename: str): | ||
| 6 | self.filename = filename | ||
| 7 | self.output_filename = filename + '.cleaned' | ||
| 8 | |||
| 9 | def get_meta(self): | ||
| 10 | raise NotImplementedError | ||
| 11 | |||
| 12 | def remove_all(self): | ||
| 13 | raise NotImplementedError | ||
diff --git a/src/parsers/audio.py b/src/parsers/audio.py deleted file mode 100644 index 4da298c..0000000 --- a/src/parsers/audio.py +++ /dev/null | |||
| @@ -1,37 +0,0 @@ | |||
| 1 | import subprocess | ||
| 2 | import shutil | ||
| 3 | import json | ||
| 4 | |||
| 5 | import mutagen | ||
| 6 | |||
| 7 | from . import abstract | ||
| 8 | |||
| 9 | class MutagenParser(abstract.AbstractParser): | ||
| 10 | def get_meta(self): | ||
| 11 | f = mutagen.File(self.filename) | ||
| 12 | if f.tags: | ||
| 13 | return f.tags | ||
| 14 | return {} | ||
| 15 | |||
| 16 | def remove_all(self): | ||
| 17 | shutil.copy(self.filename, self.output_filename) | ||
| 18 | f = mutagen.File(self.output_filename) | ||
| 19 | f.delete() | ||
| 20 | f.save() | ||
| 21 | return True | ||
| 22 | |||
| 23 | class MP3Parser(MutagenParser): | ||
| 24 | mimetypes = {'audio/mpeg', } | ||
| 25 | |||
| 26 | def get_meta(self): | ||
| 27 | meta = super().get_meta() | ||
| 28 | metadata = {} | ||
| 29 | for key in meta: | ||
| 30 | metadata[key] = meta[key].text | ||
| 31 | return metadata | ||
| 32 | |||
| 33 | class OGGParser(MutagenParser): | ||
| 34 | mimetypes = {'audio/ogg', } | ||
| 35 | |||
| 36 | class FLACParser(MutagenParser): | ||
| 37 | mimetypes = {'audio/flac', } | ||
diff --git a/src/parsers/jpg.py b/src/parsers/jpg.py deleted file mode 100644 index 34fc04c..0000000 --- a/src/parsers/jpg.py +++ /dev/null | |||
| @@ -1,30 +0,0 @@ | |||
| 1 | import subprocess | ||
| 2 | import json | ||
| 3 | |||
| 4 | import gi | ||
| 5 | gi.require_version('GdkPixbuf', '2.0') | ||
| 6 | from gi.repository import GdkPixbuf | ||
| 7 | |||
| 8 | from . import abstract | ||
| 9 | |||
| 10 | class JPGParser(abstract.AbstractParser): | ||
| 11 | mimetypes = {'image/jpg', } | ||
| 12 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | ||
| 13 | 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', | ||
| 14 | "FileInodeChangeDate", 'FilePermissions', 'FileType', | ||
| 15 | 'FileTypeExtension', 'MIMEType', 'ImageWidth', | ||
| 16 | 'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess', | ||
| 17 | 'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling', | ||
| 18 | 'YResolution', 'Megapixels', 'ImageHeight'} | ||
| 19 | |||
| 20 | def get_meta(self): | ||
| 21 | out = subprocess.check_output(['exiftool', '-json', self.filename]) | ||
| 22 | meta = json.loads(out.decode('utf-8'))[0] | ||
| 23 | for key in self.meta_whitelist: | ||
| 24 | meta.pop(key, None) | ||
| 25 | return meta | ||
| 26 | |||
| 27 | def remove_all(self): | ||
| 28 | pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename) | ||
| 29 | pixbuf.savev(self.output_filename, "jpeg", ["quality"], ["100"]) | ||
| 30 | return True | ||
diff --git a/src/parsers/pdf.py b/src/parsers/pdf.py deleted file mode 100644 index 90f05e1..0000000 --- a/src/parsers/pdf.py +++ /dev/null | |||
| @@ -1,89 +0,0 @@ | |||
| 1 | """ Handle PDF | ||
| 2 | |||
| 3 | """ | ||
| 4 | |||
| 5 | import os | ||
| 6 | import logging | ||
| 7 | import tempfile | ||
| 8 | import shutil | ||
| 9 | import io | ||
| 10 | import tempfile | ||
| 11 | |||
| 12 | import cairo | ||
| 13 | import gi | ||
| 14 | gi.require_version('Poppler', '0.18') | ||
| 15 | from gi.repository import Poppler | ||
| 16 | |||
| 17 | from . import abstract | ||
| 18 | |||
| 19 | logging.basicConfig(level=logging.DEBUG) | ||
| 20 | |||
| 21 | |||
| 22 | class PDFParser(abstract.AbstractParser): | ||
| 23 | mimetypes = {'application/pdf', } | ||
| 24 | meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords', | ||
| 25 | 'metadata', 'mod-date', 'producer', 'subject', 'title', | ||
| 26 | 'viewer-preferences'} | ||
| 27 | |||
| 28 | def __init__(self, filename): | ||
| 29 | super().__init__(filename) | ||
| 30 | self.uri = 'file://' + os.path.abspath(self.filename) | ||
| 31 | self.__scale = 2 | ||
| 32 | |||
| 33 | def remove_all(self): | ||
| 34 | """ | ||
| 35 | Load the document into Poppler, render pages on PNG, | ||
| 36 | and shove those PNG into a new PDF. Metadata from the new | ||
| 37 | PDF are removed via Poppler, because there is no way to tell | ||
| 38 | cairo to not add "created by cairo" during rendering. | ||
| 39 | """ | ||
| 40 | document = Poppler.Document.new_from_file(self.uri, None) | ||
| 41 | pages_count = document.get_n_pages() | ||
| 42 | |||
| 43 | _, tmp_path = tempfile.mkstemp() | ||
| 44 | pdf_surface = cairo.PDFSurface(tmp_path, 128, 128) | ||
| 45 | pdf_context = cairo.Context(pdf_surface) | ||
| 46 | |||
| 47 | for pagenum in range(pages_count): | ||
| 48 | page = document.get_page(pagenum) | ||
| 49 | page_width, page_height = page.get_size() | ||
| 50 | logging.info("Rendering page %d/%d", pagenum + 1, pages_count) | ||
| 51 | |||
| 52 | img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale) | ||
| 53 | img_context = cairo.Context(img_surface) | ||
| 54 | |||
| 55 | img_context.scale(self.__scale, self.__scale) | ||
| 56 | page.render_for_printing(img_context) | ||
| 57 | img_context.show_page() | ||
| 58 | |||
| 59 | buf = io.BytesIO() | ||
| 60 | img_surface.write_to_png(buf) | ||
| 61 | img_surface.finish() | ||
| 62 | buf.seek(0) | ||
| 63 | |||
| 64 | img = cairo.ImageSurface.create_from_png(buf) | ||
| 65 | pdf_surface.set_size(page_width*2, page_height*2) | ||
| 66 | pdf_context.set_source_surface(img, 0, 0) | ||
| 67 | pdf_context.paint() | ||
| 68 | pdf_context.show_page() | ||
| 69 | |||
| 70 | pdf_surface.finish() | ||
| 71 | |||
| 72 | # This is removing metadata added by Poppler | ||
| 73 | document = Poppler.Document.new_from_file('file://' + tmp_path) | ||
| 74 | document.set_producer('') | ||
| 75 | document.set_creator('') | ||
| 76 | document.save('file://' + os.path.abspath(self.output_filename)) | ||
| 77 | os.remove(tmp_path) | ||
| 78 | |||
| 79 | return True | ||
| 80 | |||
| 81 | def get_meta(self): | ||
| 82 | """ Return a dict with all the meta of the file | ||
| 83 | """ | ||
| 84 | document = Poppler.Document.new_from_file(self.uri, None) | ||
| 85 | metadata = {} | ||
| 86 | for key in self.meta_list: | ||
| 87 | if document.get_property(key): | ||
| 88 | metadata[key] = document.get_property(key) | ||
| 89 | return metadata | ||
diff --git a/src/parsers/png.py b/src/parsers/png.py deleted file mode 100644 index 377682e..0000000 --- a/src/parsers/png.py +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | import subprocess | ||
| 2 | import json | ||
| 3 | |||
| 4 | import cairo | ||
| 5 | |||
| 6 | from . import abstract | ||
| 7 | |||
| 8 | class PNGParser(abstract.AbstractParser): | ||
| 9 | mimetypes = {'image/png', } | ||
| 10 | meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', | ||
| 11 | 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate', | ||
| 12 | "FileInodeChangeDate", 'FilePermissions', 'FileType', | ||
| 13 | 'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType', | ||
| 14 | 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize', | ||
| 15 | 'Megapixels', 'ImageHeight'} | ||
| 16 | |||
| 17 | def get_meta(self): | ||
| 18 | out = subprocess.check_output(['exiftool', '-json', self.filename]) | ||
| 19 | meta = json.loads(out.decode('utf-8'))[0] | ||
| 20 | for key in self.meta_whitelist: | ||
| 21 | meta.pop(key, None) | ||
| 22 | return meta | ||
| 23 | |||
| 24 | def remove_all(self): | ||
| 25 | surface = cairo.ImageSurface.create_from_png(self.filename) | ||
| 26 | surface.write_to_png(self.output_filename) | ||
| 27 | return True | ||
