summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjfriedli2019-09-21 05:58:05 -0700
committerjvoisin2019-09-21 05:58:05 -0700
commit70978f7db245e30206486fc4c0605cc992005aac (patch)
treefd7317f4603deac5a3e8440f0c4c0c9a143c1437
parentc96d3b8178a007af747b9a5922e03c2ab2c47a6f (diff)
Api/bulk download for frontend
-rw-r--r--README.md40
-rw-r--r--docker-compose.yml1
-rw-r--r--main.py108
-rw-r--r--requirements.txt3
-rw-r--r--test/test_api.py197
-rw-r--r--utils.py13
6 files changed, 334 insertions, 28 deletions
diff --git a/README.md b/README.md
index 2546c5c..202f307 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,11 @@ Note that you can add multiple hosts from which you want to accept API requests.
46a space. 46a space.
47**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *` 47**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *`
48 48
49Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10`
50
51This specifies the max number of files that can be bulk downloaded using the api.
52Note: Each file has a max file size of 16mb
53
49Finally, restart uWSGI and your web server: 54Finally, restart uWSGI and your web server:
50 55
51``` 56```
@@ -149,6 +154,41 @@ The `file` parameter is the base64 encoded file which will be cleaned.
149] 154]
150``` 155```
151 156
157**Endpoint:** `/api/download/bulk`
158
159This endpoint allows you to bulk download several files
160which you uploaded beforehand. Note that the `download_list`
161MUST contain more than two files. The max length is configurable
162(default is 10).
163
164**HTTP Verbs:** POST
165
166**Body:**
167```json
168{
169 "download_list": [
170 {
171 "file_name": "uploaded_file_name.jpg",
172 "key": "uploaded_file_key"
173 }
174 ]
175}
176```
177
178The `file_name` parameter takes the file name from a previously uploaded file.
179The `key` parameter is the key from a previously uploaded file.
180
181**Example Response:**
182```json
183{
184 "output_filename": "files.2cd225d5-2d75-44a2-9f26-e120a87e4279.cleaned.zip",
185 "mime": "application/zip",
186 "key": "5ee4cf8821226340d3d5ed16bd2e1b435234a9ad218f282b489a85d116e7a4c4",
187 "meta_after": {},
188 "download_link": "http://localhost/api/download/5ee4cf8821226340d3d5ed16bd2e1b435234a9ad218f282b489a85d116e7a4c4/files.2cd225d5-2d75-44a2-9f26-e120a87e4279.cleaned.zip"
189}
190```
191
152# Docker 192# Docker
153There are two Dockerfiles present in this repository. The file called `Dockerfile.development` is used for development 193There are two Dockerfiles present in this repository. The file called `Dockerfile.development` is used for development
154and `Dockerfile.production` is used for production deployments. 194and `Dockerfile.production` is used for production deployments.
diff --git a/docker-compose.yml b/docker-compose.yml
index e758801..e925447 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,6 +8,7 @@ services:
8 - FLASK_APP=main.py 8 - FLASK_APP=main.py
9 - FLASK_ENV=development 9 - FLASK_ENV=development
10 - MAT2_ALLOW_ORIGIN_WHITELIST=* 10 - MAT2_ALLOW_ORIGIN_WHITELIST=*
11 - MAT2_MAX_FILES_BULK_DOWNLOAD=10
11 ports: 12 ports:
12 - "5000:5000" 13 - "5000:5000"
13 volumes: 14 volumes:
diff --git a/main.py b/main.py
index b059bfe..9349ec1 100644
--- a/main.py
+++ b/main.py
@@ -1,12 +1,15 @@
1import os 1import os
2import hmac 2import hmac
3import mimetypes as mtype 3import mimetypes as mtype
4from uuid import uuid4
4import jinja2 5import jinja2
5import base64 6import base64
6import io 7import io
7import binascii 8import binascii
8import utils 9import zipfile
9 10
11from cerberus import Validator
12import utils
10from libmat2 import parser_factory 13from libmat2 import parser_factory
11from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request 14from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request
12from flask_restful import Resource, Api, reqparse, abort 15from flask_restful import Resource, Api, reqparse, abort
@@ -119,6 +122,19 @@ def create_app(test_config=None):
119 complete_path = os.path.join(app.config['UPLOAD_FOLDER'], filepath) 122 complete_path = os.path.join(app.config['UPLOAD_FOLDER'], filepath)
120 return complete_path, filepath 123 return complete_path, filepath
121 124
125 def is_valid_api_download_file(filename, key):
126 if filename != secure_filename(filename):
127 abort(400, message='Insecure filename')
128
129 complete_path, filepath = get_file_paths(filename)
130
131 if not os.path.exists(complete_path):
132 abort(404, message='File not found')
133
134 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
135 abort(400, message='The file hash does not match')
136 return complete_path, filepath
137
122 class APIUpload(Resource): 138 class APIUpload(Resource):
123 139
124 def post(self): 140 def post(self):
@@ -145,30 +161,18 @@ def create_app(test_config=None):
145 abort(500, message='Unable to clean %s' % mime) 161 abort(500, message='Unable to clean %s' % mime)
146 162
147 key, meta_after, output_filename = cleanup(parser, filepath) 163 key, meta_after, output_filename = cleanup(parser, filepath)
148 return { 164 return utils.return_file_created_response(
149 'output_filename': output_filename, 165 output_filename,
150 'mime': mime, 166 mime,
151 'key': key, 167 key,
152 'meta': meta, 168 meta,
153 'meta_after': meta_after, 169 meta_after,
154 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) 170 urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
155 } 171 )
156 172
157 class APIDownload(Resource): 173 class APIDownload(Resource):
158 def get(self, key: str, filename: str): 174 def get(self, key: str, filename: str):
159 175 complete_path, filepath = is_valid_api_download_file(filename, key)
160 if filename != secure_filename(filename):
161 abort(400, message='Insecure filename')
162
163 complete_path, filepath = get_file_paths(filename)
164
165 if not os.path.exists(complete_path):
166 abort(404, message='File not found')
167 return redirect(url_for('upload_file'))
168
169 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
170 abort(400, message='The file hash does not match')
171 return redirect(url_for('upload_file'))
172 176
173 @after_this_request 177 @after_this_request
174 def remove_file(response): 178 def remove_file(response):
@@ -177,16 +181,72 @@ def create_app(test_config=None):
177 181
178 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath) 182 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath)
179 183
180 class APIMSupportedExtensions(Resource): 184 class APIBulkDownloadCreator(Resource):
185 schema = {
186 'download_list': {
187 'type': 'list',
188 'minlength': 2,
189 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)),
190 'schema': {
191 'type': 'dict',
192 'schema': {
193 'key': {'type': 'string'},
194 'file_name': {'type': 'string'}
195 }
196 }
197 }
198 }
199 v = Validator(schema)
200
201 def post(self):
202 utils.check_upload_folder(app.config['UPLOAD_FOLDER'])
203 data = request.json
204 if not self.v.validate(data):
205 abort(400, message=self.v.errors)
206 # prevent the zip file from being overwritten
207 zip_filename = 'files.' + str(uuid4()) + '.zip'
208 zip_path = os.path.join(app.config['UPLOAD_FOLDER'], zip_filename)
209 cleaned_files_zip = zipfile.ZipFile(zip_path, 'w')
210 with cleaned_files_zip:
211 for file_candidate in data['download_list']:
212 complete_path, file_path = is_valid_api_download_file(
213 file_candidate['file_name'],
214 file_candidate['key']
215 )
216 try:
217 cleaned_files_zip.write(complete_path)
218 except ValueError:
219 abort(400, message='Creating the archive failed')
220
221 try:
222 cleaned_files_zip.testzip()
223 except ValueError as e:
224 abort(400, message=str(e))
225
226 parser, mime = get_file_parser(zip_path)
227 if not parser.remove_all():
228 abort(500, message='Unable to clean %s' % mime)
229 key, meta_after, output_filename = cleanup(parser, zip_path)
230 return {
231 'output_filename': output_filename,
232 'mime': mime,
233 'key': key,
234 'meta_after': meta_after,
235 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
236 }, 201
237
238 class APISupportedExtensions(Resource):
181 def get(self): 239 def get(self):
182 return get_supported_extensions() 240 return get_supported_extensions()
183 241
184 api.add_resource(APIUpload, '/api/upload') 242 api.add_resource(APIUpload, '/api/upload')
185 api.add_resource(APIDownload, '/api/download/<string:key>/<string:filename>') 243 api.add_resource(APIDownload, '/api/download/<string:key>/<string:filename>')
186 api.add_resource(APIMSupportedExtensions, '/api/extension') 244 api.add_resource(APIBulkDownloadCreator, '/api/download/bulk')
245 api.add_resource(APISupportedExtensions, '/api/extension')
187 246
188 return app 247 return app
189 248
249
190app = create_app() 250app = create_app()
191 251
192if __name__ == '__main__': # pragma: no cover 252if __name__ == '__main__': # pragma: no cover
diff --git a/requirements.txt b/requirements.txt
index 8796aaa..42b75e2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ bubblewrap==1.2.0
4mat2==0.9.0 4mat2==0.9.0
5flask==1.0.3 5flask==1.0.3
6Flask-RESTful==0.3.7 6Flask-RESTful==0.3.7
7Flask-Cors==3.0.8 \ No newline at end of file 7Flask-Cors==3.0.8
8Cerberus==1.3.1 \ No newline at end of file
diff --git a/test/test_api.py b/test/test_api.py
index de33355..b4cbd57 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -1,8 +1,8 @@
1import unittest 1import unittest
2import tempfile 2import tempfile
3import shutil
4import json 3import json
5import os 4import os
5import shutil
6 6
7import main 7import main
8 8
@@ -35,7 +35,7 @@ class Mat2APITestCase(unittest.TestCase):
35 self.assertEqual(request.status_code, 200) 35 self.assertEqual(request.status_code, 200)
36 36
37 data = json.loads(request.data.decode('utf-8')) 37 data = json.loads(request.data.decode('utf-8'))
38 expected = { 38 expected = {
39 'output_filename': 'test_name.cleaned.jpg', 39 'output_filename': 'test_name.cleaned.jpg',
40 'mime': 'image/jpeg', 40 'mime': 'image/jpeg',
41 'key': '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161', 41 'key': '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161',
@@ -151,6 +151,199 @@ class Mat2APITestCase(unittest.TestCase):
151 request = self.app.get(data['download_link']) 151 request = self.app.get(data['download_link'])
152 self.assertEqual(request.status_code, 200) 152 self.assertEqual(request.status_code, 200)
153 153
154 request = self.app.get(data['download_link'])
155 self.assertEqual(request.status_code, 404)
156
157 def test_api_bulk_download(self):
158 request = self.app.post('/api/upload',
159 data='{"file_name": "test_name.jpg", '
160 '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf'
161 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}',
162 headers={'content-type': 'application/json'}
163 )
164 self.assertEqual(request.status_code, 200)
165 upload_one = json.loads(request.data.decode('utf-8'))
166
167 request = self.app.post('/api/upload',
168 data='{"file_name": "test_name_two.jpg", '
169 '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42'
170 'mO0vqpQDwAENAGxOnU0jQAAAABJRU5ErkJggg=="}',
171 headers={'content-type': 'application/json'}
172 )
173 self.assertEqual(request.status_code, 200)
174 upload_two = json.loads(request.data.decode('utf-8'))
175
176 post_body = {
177 u'download_list': [
178 {
179 u'file_name': upload_one['output_filename'],
180 u'key': upload_one['key']
181 },
182 {
183 u'file_name': upload_two['output_filename'],
184 u'key': upload_two['key']
185 }
186 ]
187 }
188 request = self.app.post('/api/download/bulk',
189 data=json.dumps(post_body),
190 headers={'content-type': 'application/json'}
191 )
192
193 response = json.loads(request.data.decode('utf-8'))
194 self.assertEqual(request.status_code, 201)
195
196 self.assertIn(
197 "http://localhost/api/download/",
198 response['download_link']
199 )
200 self.assertIn(
201 ".cleaned.zip",
202 response['download_link']
203 )
204
205 self.assertIn('files.', response['output_filename'])
206 self.assertIn('cleaned.zip', response['output_filename'])
207 self.assertIn(response['mime'], 'application/zip')
208 self.assertEqual(response['meta_after'], {})
209
210 request = self.app.get(response['download_link'])
211 self.assertEqual(request.status_code, 200)
212
213 request = self.app.get(response['download_link'])
214 self.assertEqual(request.status_code, 404)
215
216 def test_api_bulk_download_validation(self):
217 post_body = {
218 u'download_list': [
219 {
220 u'file_name': 'invalid_file_name',
221 u'key': 'invalid_key'
222 }
223 ]
224 }
225 request = self.app.post('/api/download/bulk',
226 data=json.dumps(post_body),
227 headers={'content-type': 'application/json'}
228 )
229
230 response = json.loads(request.data.decode('utf-8'))
231 self.assertEqual(response['message']['download_list'][0], 'min length is 2')
232 self.assertEqual(request.status_code, 400)
233
234 post_body = {
235 u'download_list': [
236 {
237 u'file_name': 'test.jpg',
238 u'key': 'key'
239 },
240 {
241 u'file_name': 'test.jpg',
242 u'key': 'key'
243 },
244 {
245 u'file_name': 'test.jpg',
246 u'key': 'key'
247 },
248 {
249 u'file_name': 'test.jpg',
250 u'key': 'key'
251 },
252 {
253 u'file_name': 'test.jpg',
254 u'key': 'key'
255 },
256 {
257 u'file_name': 'test.jpg',
258 u'key': 'key'
259 },
260 {
261 u'file_name': 'test.jpg',
262 u'key': 'key'
263 },
264 {
265 u'file_name': 'test.jpg',
266 u'key': 'key'
267 },
268 {
269 u'file_name': 'test.jpg',
270 u'key': 'key'
271 },
272 {
273 u'file_name': 'test.jpg',
274 u'key': 'key'
275 },
276 {
277 u'file_name': 'test.jpg',
278 u'key': 'key'
279 }
280 ]
281 }
282 request = self.app.post('/api/download/bulk',
283 data=json.dumps(post_body),
284 headers={'content-type': 'application/json'}
285 )
286
287 response = json.loads(request.data.decode('utf-8'))
288 self.assertEqual(response['message']['download_list'][0], 'max length is 10')
289 self.assertEqual(request.status_code, 400)
290
291 post_body = {
292 u'download_list': [
293 {
294 u'file_name_x': 'invalid_file_name',
295 u'key_x': 'invalid_key'
296 },
297 {
298 u'file_name_x': 'invalid_file_name',
299 u'key_x': 'invalid_key'
300 }
301 ]
302 }
303 request = self.app.post('/api/download/bulk',
304 data=json.dumps(post_body),
305 headers={'content-type': 'application/json'}
306 )
307
308 response = json.loads(request.data.decode('utf-8'))
309 expected = {
310 'message': {
311 'download_list': [
312 {
313 '0': [{
314 'file_name_x': ['unknown field'],
315 'key_x': ['unknown field']
316 }],
317 '1': [{
318 'file_name_x': ['unknown field'],
319 'key_x': ['unknown field']
320 }]
321 }
322 ]
323 }
324 }
325 self.assertEqual(response, expected)
326 self.assertEqual(request.status_code, 400)
327
328 post_body = {
329 u'download_list': [
330 {
331 u'file_name': 'invalid_file_name1',
332 u'key': 'invalid_key1'
333 },
334 {
335 u'file_name': 'invalid_file_name2',
336 u'key': 'invalid_key2'
337 }
338 ]
339 }
340 request = self.app.post('/api/download/bulk',
341 data=json.dumps(post_body),
342 headers={'content-type': 'application/json'}
343 )
344 response = json.loads(request.data.decode('utf-8'))
345 self.assertEqual('File not found', response['message'])
346
154 347
155if __name__ == '__main__': 348if __name__ == '__main__':
156 unittest.main() 349 unittest.main()
diff --git a/utils.py b/utils.py
index fb2fb08..023c838 100644
--- a/utils.py
+++ b/utils.py
@@ -19,4 +19,15 @@ def hash_file(filepath: str) -> str:
19 19
20def check_upload_folder(upload_folder): 20def check_upload_folder(upload_folder):
21 if not os.path.exists(upload_folder): 21 if not os.path.exists(upload_folder):
22 os.mkdir(upload_folder) \ No newline at end of file 22 os.mkdir(upload_folder)
23
24
25def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link):
26 return {
27 'output_filename': output_filename,
28 'mime': mime,
29 'key': key,
30 'meta': meta,
31 'meta_after': meta_after,
32 'download_link': download_link
33 }