summaryrefslogtreecommitdiff
path: root/lib/parser.py
diff options
context:
space:
mode:
authorjvoisin2011-06-20 01:25:33 +0200
committerjvoisin2011-06-20 01:25:33 +0200
commit52f2fedd5d73807d42ba5c397c3e4c5348b47a47 (patch)
tree09070e23428f3f72e4a95da8df33520adacbf01f /lib/parser.py
parentde5917e5f01374bb1a647f49ae85283241a2bea9 (diff)
Introduction of a nice separation of functions/class in differents files
Diffstat (limited to 'lib/parser.py')
-rw-r--r--lib/parser.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/parser.py b/lib/parser.py
new file mode 100644
index 0000000..828648f
--- /dev/null
+++ b/lib/parser.py
@@ -0,0 +1,79 @@
1'''
2 Parent class of all parser
3'''
4
5import hachoir_core.error
6import hachoir_parser
7import hachoir_metadata
8import hachoir_editor
9import sys
10
11POSTFIX = ".cleaned"
12
13class Generic_parser():
14 def __init__(self, realname, filename, parser, editor):
15 self.meta = {}
16 self.filename = filename
17 self.realname = realname
18 self.parser = parser
19 self.editor = editor
20 self.meta = self.__fill_meta()
21
22 def __fill_meta(self):
23 metadata = {}
24 try:
25 meta = hachoir_metadata.extractMetadata(self.parser)
26 except hachoir_core.error.HachoirError, err:
27 print("Metadata extraction error: %s" % err)
28
29 if not meta:
30 print("Unable to extract metadata from the file %s" % self.filename)
31 sys.exit(1)
32
33 for title in meta:
34 #fixme i'm so dirty
35 if title.values != []: #if the field is not empty
36 value = ""
37 for item in title.values:
38 value = item.text
39 metadata[title.key] = value
40 return metadata
41
42 def is_clean(self):
43 '''
44 Check if the file is clean from harmful metadatas
45 '''
46 for field in self.editor:
47 if self._should_remove(field):
48 return False
49 return True
50
51 def remove_all(self):
52 '''
53 Remove all the files that are compromizing
54 '''
55 for field in self.editor:
56 if self._should_remove(field):
57 self._remove(field)
58 hachoir_core.field.writeIntoFile(self.editor, self.filename + POSTFIX)
59
60 def _remove(self, field):
61 '''
62 Remove the given field
63 '''
64 del self.editor[field.name]
65
66
67 def get_meta(self):
68 '''
69 return a dict with all the meta of the file
70 '''
71 #am I useless ?
72 return self.meta
73
74 def _should_remove(self, key):
75 '''
76 return True if the field is compromizing
77 abstract method
78 '''
79 raise NotImplementedError()