summaryrefslogtreecommitdiff
path: root/src/parsers
diff options
context:
space:
mode:
authorjvoisin2018-03-31 15:46:17 +0200
committerjvoisin2018-03-31 15:46:17 +0200
commitf391c9603c36a8ec80942c23ac6ba39fca5df72a (patch)
tree7fdc2053c01f103a675274ebd3e6abcffba4dfbe /src/parsers
parent088c3d013ce4515920dea5e0becb98b36afa9a31 (diff)
Change a bit the source code organisation
Diffstat (limited to 'src/parsers')
-rw-r--r--src/parsers/__init__.py0
-rw-r--r--src/parsers/abstract.py13
-rw-r--r--src/parsers/audio.py37
-rw-r--r--src/parsers/jpg.py30
-rw-r--r--src/parsers/pdf.py89
-rw-r--r--src/parsers/png.py27
6 files changed, 0 insertions, 196 deletions
diff --git a/src/parsers/__init__.py b/src/parsers/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/src/parsers/__init__.py
+++ /dev/null
diff --git a/src/parsers/abstract.py b/src/parsers/abstract.py
deleted file mode 100644
index c2d282f..0000000
--- a/src/parsers/abstract.py
+++ /dev/null
@@ -1,13 +0,0 @@
1class AbstractParser(object):
2 meta_list = set()
3 mimetypes = set()
4
5 def __init__(self, filename: str):
6 self.filename = filename
7 self.output_filename = filename + '.cleaned'
8
9 def get_meta(self):
10 raise NotImplementedError
11
12 def remove_all(self):
13 raise NotImplementedError
diff --git a/src/parsers/audio.py b/src/parsers/audio.py
deleted file mode 100644
index 4da298c..0000000
--- a/src/parsers/audio.py
+++ /dev/null
@@ -1,37 +0,0 @@
1import subprocess
2import shutil
3import json
4
5import mutagen
6
7from . import abstract
8
9class MutagenParser(abstract.AbstractParser):
10 def get_meta(self):
11 f = mutagen.File(self.filename)
12 if f.tags:
13 return f.tags
14 return {}
15
16 def remove_all(self):
17 shutil.copy(self.filename, self.output_filename)
18 f = mutagen.File(self.output_filename)
19 f.delete()
20 f.save()
21 return True
22
23class MP3Parser(MutagenParser):
24 mimetypes = {'audio/mpeg', }
25
26 def get_meta(self):
27 meta = super().get_meta()
28 metadata = {}
29 for key in meta:
30 metadata[key] = meta[key].text
31 return metadata
32
33class OGGParser(MutagenParser):
34 mimetypes = {'audio/ogg', }
35
36class FLACParser(MutagenParser):
37 mimetypes = {'audio/flac', }
diff --git a/src/parsers/jpg.py b/src/parsers/jpg.py
deleted file mode 100644
index 34fc04c..0000000
--- a/src/parsers/jpg.py
+++ /dev/null
@@ -1,30 +0,0 @@
1import subprocess
2import json
3
4import gi
5gi.require_version('GdkPixbuf', '2.0')
6from gi.repository import GdkPixbuf
7
8from . import abstract
9
10class JPGParser(abstract.AbstractParser):
11 mimetypes = {'image/jpg', }
12 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
13 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
14 "FileInodeChangeDate", 'FilePermissions', 'FileType',
15 'FileTypeExtension', 'MIMEType', 'ImageWidth',
16 'ImageSize', 'BitsPerSample', 'ColorComponents', 'EncodingProcess',
17 'JFIFVersion', 'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
18 'YResolution', 'Megapixels', 'ImageHeight'}
19
20 def get_meta(self):
21 out = subprocess.check_output(['exiftool', '-json', self.filename])
22 meta = json.loads(out.decode('utf-8'))[0]
23 for key in self.meta_whitelist:
24 meta.pop(key, None)
25 return meta
26
27 def remove_all(self):
28 pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
29 pixbuf.savev(self.output_filename, "jpeg", ["quality"], ["100"])
30 return True
diff --git a/src/parsers/pdf.py b/src/parsers/pdf.py
deleted file mode 100644
index 90f05e1..0000000
--- a/src/parsers/pdf.py
+++ /dev/null
@@ -1,89 +0,0 @@
1""" Handle PDF
2
3"""
4
5import os
6import logging
7import tempfile
8import shutil
9import io
10import tempfile
11
12import cairo
13import gi
14gi.require_version('Poppler', '0.18')
15from gi.repository import Poppler
16
17from . import abstract
18
19logging.basicConfig(level=logging.DEBUG)
20
21
22class PDFParser(abstract.AbstractParser):
23 mimetypes = {'application/pdf', }
24 meta_list = {'author', 'creation-date', 'creator', 'format', 'keywords',
25 'metadata', 'mod-date', 'producer', 'subject', 'title',
26 'viewer-preferences'}
27
28 def __init__(self, filename):
29 super().__init__(filename)
30 self.uri = 'file://' + os.path.abspath(self.filename)
31 self.__scale = 2
32
33 def remove_all(self):
34 """
35 Load the document into Poppler, render pages on PNG,
36 and shove those PNG into a new PDF. Metadata from the new
37 PDF are removed via Poppler, because there is no way to tell
38 cairo to not add "created by cairo" during rendering.
39 """
40 document = Poppler.Document.new_from_file(self.uri, None)
41 pages_count = document.get_n_pages()
42
43 _, tmp_path = tempfile.mkstemp()
44 pdf_surface = cairo.PDFSurface(tmp_path, 128, 128)
45 pdf_context = cairo.Context(pdf_surface)
46
47 for pagenum in range(pages_count):
48 page = document.get_page(pagenum)
49 page_width, page_height = page.get_size()
50 logging.info("Rendering page %d/%d", pagenum + 1, pages_count)
51
52 img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale)
53 img_context = cairo.Context(img_surface)
54
55 img_context.scale(self.__scale, self.__scale)
56 page.render_for_printing(img_context)
57 img_context.show_page()
58
59 buf = io.BytesIO()
60 img_surface.write_to_png(buf)
61 img_surface.finish()
62 buf.seek(0)
63
64 img = cairo.ImageSurface.create_from_png(buf)
65 pdf_surface.set_size(page_width*2, page_height*2)
66 pdf_context.set_source_surface(img, 0, 0)
67 pdf_context.paint()
68 pdf_context.show_page()
69
70 pdf_surface.finish()
71
72 # This is removing metadata added by Poppler
73 document = Poppler.Document.new_from_file('file://' + tmp_path)
74 document.set_producer('')
75 document.set_creator('')
76 document.save('file://' + os.path.abspath(self.output_filename))
77 os.remove(tmp_path)
78
79 return True
80
81 def get_meta(self):
82 """ Return a dict with all the meta of the file
83 """
84 document = Poppler.Document.new_from_file(self.uri, None)
85 metadata = {}
86 for key in self.meta_list:
87 if document.get_property(key):
88 metadata[key] = document.get_property(key)
89 return metadata
diff --git a/src/parsers/png.py b/src/parsers/png.py
deleted file mode 100644
index 377682e..0000000
--- a/src/parsers/png.py
+++ /dev/null
@@ -1,27 +0,0 @@
1import subprocess
2import json
3
4import cairo
5
6from . import abstract
7
8class PNGParser(abstract.AbstractParser):
9 mimetypes = {'image/png', }
10 meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
11 'Directory', 'FileSize', 'FileModifyDate', 'FileAccessDate',
12 "FileInodeChangeDate", 'FilePermissions', 'FileType',
13 'FileTypeExtension', 'MIMEType', 'ImageWidth', 'BitDepth', 'ColorType',
14 'Compression', 'Filter', 'Interlace', 'BackgroundColor', 'ImageSize',
15 'Megapixels', 'ImageHeight'}
16
17 def get_meta(self):
18 out = subprocess.check_output(['exiftool', '-json', self.filename])
19 meta = json.loads(out.decode('utf-8'))[0]
20 for key in self.meta_whitelist:
21 meta.pop(key, None)
22 return meta
23
24 def remove_all(self):
25 surface = cairo.ImageSurface.create_from_png(self.filename)
26 surface.write_to_png(self.output_filename)
27 return True