From a6bc786c8728cf77bc6346689aaf6cf678717a58 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sun, 27 Oct 2013 21:44:49 +0000 Subject: Greatly improves torrent handling - Fields are now handled in a recursive way - Use a whitelist instead of a whitelist - Fields are now removed instead of being nullified - The code is more Pythonic --- MAT/misc.py | 78 +++++++++++++++++++++++++----------------- "test/clean \303\251.torrent" | Bin 54609 -> 54565 bytes 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/MAT/misc.py b/MAT/misc.py index e016d18..b0c22f4 100644 --- a/MAT/misc.py +++ b/MAT/misc.py @@ -1,5 +1,4 @@ -''' - Care about misc formats +''' Care about misc formats ''' import parser @@ -8,55 +7,70 @@ from bencode import bencode class TorrentStripper(parser.GenericParser): - ''' - Represent a torrent file with the help + ''' Represent a torrent file with the help of the bencode lib from Petru Paler ''' def __init__(self, filename, parser, mime, backup, is_writable, **kwargs): super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs) - self.fields = ['comment', 'creation date', 'created by'] + self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces', + 'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash']) - def is_clean(self): + def __get_key_recursively(self, dictionary): + ''' Get recursively all keys from a dict and + its subdicts ''' - Check if the file is clean from harmful metadatas + for (i,j) in dictionary.items(): + if isinstance(j, dict): + return set([i]).union(self.__get_key_recursively(j)) + return set([i]) + + def is_clean(self): + ''' Check if the file is clean from harmful metadata ''' with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) - for key in self.fields: - try: - if decoded[key]: - return False - except KeyError: - pass - return True + return self.fields.issuperset(self.__get_key_recursively(decoded)) - def get_meta(self): + def __get_meta_recursively(self, dictionary): + ''' Get recursively all harmful metadata ''' - Return a dict with all the meta of the file + d = dict() + for(i,j) in dictionary.items(): + if i not in self.fields: + d[i] = j + elif isinstance(j, dict): + d = dict(d.items() + self.__get_meta_recursively(j).items()) + return d + + def get_meta(self): + ''' Return a dict with all the meta of the file ''' - metadata = {} with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) - for key in self.fields: - try: - if decoded[key]: - metadata[key] = decoded[key] - except KeyError: - pass - return metadata + return self.__get_meta_recursively(decoded) - def remove_all(self): + def __remove_all_recursively(self, dictionary): + ''' Remove recursively all compromizing fields ''' - Remove all the files that are compromising + d = dict() + for(i,j) in filter(lambda i: i in self.fields, dictionary.items()): + if isinstance(j, dict): + d = dict(d.items() + self.__get_meta_recursively(j).items()) + else: + d[i] = j + return d + + def remove_all(self): + ''' Remove all comprimizing fields ''' + decoded = '' with open(self.filename, 'r') as f: decoded = bencode.bdecode(f.read()) - for key in self.fields: - try: - decoded[key] = '' - except KeyError: - pass + + cleaned = {i:j for i,j in decoded.items() if i in self.fields} + with open(self.output, 'w') as f: # encode the decoded torrent - f.write(bencode.bencode(decoded)) # and write it in self.output + f.write(bencode.bencode(cleaned)) # and write it in self.output + self.do_backup() return True diff --git "a/test/clean \303\251.torrent" "b/test/clean \303\251.torrent" index e6324f7..428a709 100644 Binary files "a/test/clean \303\251.torrent" and "b/test/clean \303\251.torrent" differ -- cgit v1.3