summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjvoisin2018-04-01 00:17:06 +0200
committerjvoisin2018-04-01 00:17:06 +0200
commitc186fc42929b2660e5c507adeb8a8fb406593b11 (patch)
treec82d071d61ceec3a20d48961390e73f6139f2136 /src
parent6d506b87575ded3a59c9fc4f7b28d4160d9e9c43 (diff)
Clean deep metadata for zip files
Diffstat (limited to 'src')
-rw-r--r--src/libreoffice.py12
-rw-r--r--src/office.py14
2 files changed, 24 insertions, 2 deletions
diff --git a/src/libreoffice.py b/src/libreoffice.py
index a3481a1..809ae3c 100644
--- a/src/libreoffice.py
+++ b/src/libreoffice.py
@@ -34,6 +34,13 @@ class LibreOfficeParser(abstract.AbstractParser):
34 zipin.close() 34 zipin.close()
35 return metadata 35 return metadata
36 36
37 def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
38 zipinfo.compress_type = zipfile.ZIP_DEFLATED
39 zipinfo.create_system = 3 # Linux
40 zipinfo.comment = b''
41 zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
42 return zipinfo
43
37 def remove_all(self): 44 def remove_all(self):
38 zin = zipfile.ZipFile(self.filename, 'r') 45 zin = zipfile.ZipFile(self.filename, 'r')
39 zout = zipfile.ZipFile(self.output_filename, 'w') 46 zout = zipfile.ZipFile(self.output_filename, 'w')
@@ -51,7 +58,10 @@ class LibreOfficeParser(abstract.AbstractParser):
51 print("%s isn't supported" % item.filename) 58 print("%s isn't supported" % item.filename)
52 continue 59 continue
53 tmp_parser.remove_all() 60 tmp_parser.remove_all()
54 zout.write(tmp_parser.output_filename, item.filename) 61 zinfo = zipfile.ZipInfo(item.filename)
62 item = self.__clean_zipinfo(item)
63 with open(tmp_parser.output_filename, 'rb') as f:
64 zout.writestr(zinfo, f.read())
55 shutil.rmtree(temp_folder) 65 shutil.rmtree(temp_folder)
56 zout.close() 66 zout.close()
57 zin.close() 67 zin.close()
diff --git a/src/office.py b/src/office.py
index 5de0597..a729f2f 100644
--- a/src/office.py
+++ b/src/office.py
@@ -33,6 +33,13 @@ class OfficeParser(abstract.AbstractParser):
33 zipin.close() 33 zipin.close()
34 return metadata 34 return metadata
35 35
36 def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo:
37 zipinfo.compress_type = zipfile.ZIP_DEFLATED
38 zipinfo.create_system = 3 # Linux
39 zipinfo.comment = b''
40 zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
41 return zipinfo
42
36 def remove_all(self): 43 def remove_all(self):
37 zin = zipfile.ZipFile(self.filename, 'r') 44 zin = zipfile.ZipFile(self.filename, 'r')
38 zout = zipfile.ZipFile(self.output_filename, 'w') 45 zout = zipfile.ZipFile(self.output_filename, 'w')
@@ -45,6 +52,7 @@ class OfficeParser(abstract.AbstractParser):
45 if not item.filename.endswith('.rels'): 52 if not item.filename.endswith('.rels'):
46 continue # don't keep metadata files 53 continue # don't keep metadata files
47 if item.filename in self.files_to_keep: 54 if item.filename in self.files_to_keep:
55 item = self.__clean_zipinfo(item)
48 zout.writestr(item, zin.read(item)) 56 zout.writestr(item, zin.read(item))
49 continue 57 continue
50 58
@@ -54,7 +62,11 @@ class OfficeParser(abstract.AbstractParser):
54 print("%s isn't supported" % item.filename) 62 print("%s isn't supported" % item.filename)
55 continue 63 continue
56 tmp_parser.remove_all() 64 tmp_parser.remove_all()
57 zout.write(tmp_parser.output_filename, item.filename) 65 zinfo = zipfile.ZipInfo(item.filename)
66 item = self.__clean_zipinfo(item)
67 with open(tmp_parser.output_filename, 'rb') as f:
68 zout.writestr(zinfo, f.read())
69
58 shutil.rmtree(temp_folder) 70 shutil.rmtree(temp_folder)
59 zout.close() 71 zout.close()
60 zin.close() 72 zin.close()