From c186fc42929b2660e5c507adeb8a8fb406593b11 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 1 Apr 2018 00:17:06 +0200 Subject: Clean deep metadata for zip files --- src/libreoffice.py | 12 +++++++++++- src/office.py | 14 +++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/libreoffice.py b/src/libreoffice.py index a3481a1..809ae3c 100644 --- a/src/libreoffice.py +++ b/src/libreoffice.py @@ -34,6 +34,13 @@ class LibreOfficeParser(abstract.AbstractParser): zipin.close() return metadata + def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: + zipinfo.compress_type = zipfile.ZIP_DEFLATED + zipinfo.create_system = 3 # Linux + zipinfo.comment = b'' + zipinfo.date_time = (1980, 1, 1, 0, 0, 0) + return zipinfo + def remove_all(self): zin = zipfile.ZipFile(self.filename, 'r') zout = zipfile.ZipFile(self.output_filename, 'w') @@ -51,7 +58,10 @@ class LibreOfficeParser(abstract.AbstractParser): print("%s isn't supported" % item.filename) continue tmp_parser.remove_all() - zout.write(tmp_parser.output_filename, item.filename) + zinfo = zipfile.ZipInfo(item.filename) + item = self.__clean_zipinfo(item) + with open(tmp_parser.output_filename, 'rb') as f: + zout.writestr(zinfo, f.read()) shutil.rmtree(temp_folder) zout.close() zin.close() diff --git a/src/office.py b/src/office.py index 5de0597..a729f2f 100644 --- a/src/office.py +++ b/src/office.py @@ -33,6 +33,13 @@ class OfficeParser(abstract.AbstractParser): zipin.close() return metadata + def __clean_zipinfo(self, zipinfo:zipfile.ZipInfo) -> zipfile.ZipInfo: + zipinfo.compress_type = zipfile.ZIP_DEFLATED + zipinfo.create_system = 3 # Linux + zipinfo.comment = b'' + zipinfo.date_time = (1980, 1, 1, 0, 0, 0) + return zipinfo + def remove_all(self): zin = zipfile.ZipFile(self.filename, 'r') zout = zipfile.ZipFile(self.output_filename, 'w') @@ -45,6 +52,7 @@ class OfficeParser(abstract.AbstractParser): if not item.filename.endswith('.rels'): continue # don't keep metadata files if item.filename in self.files_to_keep: + item = self.__clean_zipinfo(item) zout.writestr(item, zin.read(item)) continue @@ -54,7 +62,11 @@ class OfficeParser(abstract.AbstractParser): print("%s isn't supported" % item.filename) continue tmp_parser.remove_all() - zout.write(tmp_parser.output_filename, item.filename) + zinfo = zipfile.ZipInfo(item.filename) + item = self.__clean_zipinfo(item) + with open(tmp_parser.output_filename, 'rb') as f: + zout.writestr(zinfo, f.read()) + shutil.rmtree(temp_folder) zout.close() zin.close() -- cgit v1.3