diff options
| author | jvoisin | 2011-06-30 19:28:24 +0200 |
|---|---|---|
| committer | jvoisin | 2011-06-30 19:28:24 +0200 |
| commit | 347daf394f9e770ad031601b95abd32b96609adb (patch) | |
| tree | 6044c872877c5790b1292b6bd6a71c4649cc8519 /lib/misc.py | |
| parent | 3bc343109528964f6bec075e83106d8f54710b99 (diff) | |
almost done with pdf. Can't figure how to get rid of the 'unicode' problem now.
Diffstat (limited to '')
| -rw-r--r-- | lib/misc.py | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/lib/misc.py b/lib/misc.py index a8070f1..8063ae1 100644 --- a/lib/misc.py +++ b/lib/misc.py | |||
| @@ -1,19 +1,24 @@ | |||
| 1 | import parser | 1 | import parser |
| 2 | import pdfrw | 2 | import pdfrw |
| 3 | import shutil | 3 | import os |
| 4 | import mat | ||
| 5 | import subprocess | ||
| 6 | import tempfile | ||
| 7 | import glob | ||
| 4 | 8 | ||
| 5 | class PdfStripper(parser.Generic_parser): | 9 | class PdfStripper(parser.Generic_parser): |
| 6 | ''' | 10 | ''' |
| 7 | Represent a pdf file, with the help of pdfrw | 11 | Represent a pdf file, with the help of pdfrw |
| 8 | ''' | 12 | ''' |
| 9 | def __init__(self, filename): | 13 | def __init__(self, filename, backup): |
| 10 | self.filename = filename | 14 | self.filename = filename |
| 15 | self.backup = backup | ||
| 11 | self.trailer = pdfrw.PdfReader(self.filename) | 16 | self.trailer = pdfrw.PdfReader(self.filename) |
| 12 | self.writer = pdfrw.PdfWriter() | 17 | self.writer = pdfrw.PdfWriter() |
| 13 | 18 | ||
| 14 | def remove_all(self): | 19 | def remove_all(self): |
| 15 | ''' | 20 | ''' |
| 16 | Remove all the files that are compromizing | 21 | Remove all the meta fields that are compromizing |
| 17 | ''' | 22 | ''' |
| 18 | self.trailer.Info.Title = '' | 23 | self.trailer.Info.Title = '' |
| 19 | self.trailer.Info.Author = '' | 24 | self.trailer.Info.Author = '' |
| @@ -25,9 +30,33 @@ class PdfStripper(parser.Generic_parser): | |||
| 25 | self.writer.trailer = self.trailer | 30 | self.writer.trailer = self.trailer |
| 26 | self.writer.write(self.filename + parser.POSTFIX) | 31 | self.writer.write(self.filename + parser.POSTFIX) |
| 27 | if self.backup is False: | 32 | if self.backup is False: |
| 28 | self.secure_remove() #remove the old file | 33 | mat.secure_remove(self.filename) #remove the old file |
| 29 | shutil.rename(self.filename+ POSTFIX, self.filename)#rename the new | 34 | os.rename(self.filename + parser.POSTFIX, self.filename) |
| 30 | 35 | ||
| 36 | def remove_all_ugly(self): | ||
| 37 | ''' | ||
| 38 | Transform each pages into a jpg, clean them, | ||
| 39 | then re-assemble them into a new pdf | ||
| 40 | ''' | ||
| 41 | output_file = self.filename + parser.POSTFIX + '.pdf' | ||
| 42 | _, self.tmpdir = tempfile.mkstemp() | ||
| 43 | subprocess.call('gm convert %s %s' % (self.filename, self.tmpdir + | ||
| 44 | 'temp.jpg'), shell=True) | ||
| 45 | for current_file in glob.glob(self.tmpdir + 'temp*'): | ||
| 46 | class_file = mat.create_class_file(current_file, False) | ||
| 47 | class_file.remove_all() | ||
| 48 | subprocess.call('gm convert %s %s' % (self.tmpdir + | ||
| 49 | 'temp.jpg*', output_file), shell=True) | ||
| 50 | for current_file in glob.glob(self.tmpdir + 'temp*'): | ||
| 51 | mat.secure_remove(current_file) | ||
| 52 | if self.backup is False: | ||
| 53 | mat.secure_remove(self.filename) #remove the old file | ||
| 54 | os.rename(output_file, self.filename)#rename the new | ||
| 55 | name = self.filename | ||
| 56 | else: | ||
| 57 | name = output_file | ||
| 58 | class_file = mat.create_class_file(name, False) | ||
| 59 | class_file.remove_all() | ||
| 31 | 60 | ||
| 32 | def is_clean(self): | 61 | def is_clean(self): |
| 33 | ''' | 62 | ''' |
