summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjvoisin2018-04-22 22:02:00 +0200
committerjvoisin2018-04-22 22:02:00 +0200
commit57bf89e035609c39506372cc9deed92dfbd42716 (patch)
tree69194a04c62af54644dd3df83dfee1db9e265241 /src
parentecb199b4a6a2f54b84237d4f74c145a051c4c08b (diff)
Add support for torrent files cleaning
Diffstat (limited to 'src')
-rw-r--r--src/torrent.py123
1 files changed, 123 insertions, 0 deletions
diff --git a/src/torrent.py b/src/torrent.py
new file mode 100644
index 0000000..df64161
--- /dev/null
+++ b/src/torrent.py
@@ -0,0 +1,123 @@
1import os
2import re
3import shutil
4import tempfile
5import datetime
6import zipfile
7
8from . import abstract, parser_factory
9
10
11
12class TorrentParser(abstract.AbstractParser):
13 mimetypes = {'application/x-bittorrent', }
14 whitelist = {b'announce', b'announce-list', b'info'}
15
16 def __init__(self, filename):
17 super().__init__(filename)
18 self.__decode_func = {
19 ord('l'): self.__decode_list,
20 ord('d'): self.__decode_dict,
21 ord('i'): self.__decode_int
22 }
23 for i in range(0, 10):
24 self.__decode_func[ord(str(i))] = self.__decode_string
25
26 self.__encode_func = {
27 int: self.__encode_int,
28 bytes: self.__encode_string,
29 list: self.__encode_list,
30 dict: self.__encode_dict,
31 }
32
33
34 def get_meta(self):
35 metadata = {}
36 with open(self.filename, 'rb') as f:
37 d = self.__bdecode(f.read())
38 for k,v in d.items():
39 if k not in self.whitelist:
40 metadata[k.decode('utf-8')] = v
41 return metadata
42
43
44 def remove_all(self):
45 cleaned = dict()
46 with open(self.filename, 'rb') as f:
47 d = self.__bdecode(f.read())
48 for k,v in d.items():
49 if k in self.whitelist:
50 cleaned[k] = v
51 with open(self.output_filename, 'wb') as f:
52 f.write(self.__bencode(cleaned))
53 return True
54
55 def __decode_int(self, s):
56 s = s[1:]
57 next_idx = s.index(b'e')
58 if s.startswith(b'-0'):
59 raise ValueError # negative zero doesn't exist
60 if s.startswith(b'0') and next_idx != 1:
61 raise ValueError # no leading zero except for zero itself
62 return int(s[:next_idx]), s[next_idx+1:]
63
64 def __decode_string(self, s):
65 end = s.index(b':')
66 str_len = int(s[:end])
67 if s[0] == b'0' and end != 1:
68 raise ValueError
69 s = s[1:] # skip terminal `:`
70 return s[end:end+str_len], s[end+str_len:]
71
72 def __decode_list(self, s):
73 r = list()
74 s = s[1:] # skip leading `l`
75 while s[0] != ord('e'):
76 v, s = self.__decode_func[s[0]](s)
77 r.append(v)
78 return r, s[1:]
79
80 def __decode_dict(self, s):
81 r = dict()
82 s = s[1:]
83 while s[0] != ord(b'e'):
84 k, s = self.__decode_string(s)
85 r[k], s = self.__decode_func[s[0]](s)
86 return r, s[1:]
87
88 def __bdecode(self, s):
89 try:
90 r, l = self.__decode_func[s[0]](s)
91 except (IndexError, KeyError, ValueError) as e:
92 print("not a valid bencoded string: %s" % e)
93 return None
94 if l != b'':
95 print("invalid bencoded value (data after valid prefix)")
96 return None
97 return r
98
99 @staticmethod
100 def __encode_int(x):
101 return b'i' + bytes(str(x), 'utf-8') + b'e'
102
103 @staticmethod
104 def __encode_string(x:str):
105 return bytes((str(len(x))), 'utf-8') + b':' + x
106
107 def __encode_list(self, x):
108 ret = b''
109 for i in x:
110 ret += self.__encode_func[type(i)](i)
111 return b'l' + ret + b'e'
112
113 def __encode_dict(self, x):
114 ret = b''
115 for k, v in sorted(x.items()):
116 ret += self.__encode_func[type(k)](k)
117 ret += self.__encode_func[type(v)](v)
118 return b'd' + ret + b'e'
119
120 def __bencode(self, x):
121 return self.__encode_func[type(x)](x)
122
123