From 70978f7db245e30206486fc4c0605cc992005aac Mon Sep 17 00:00:00 2001 From: jfriedli Date: Sat, 21 Sep 2019 05:58:05 -0700 Subject: Api/bulk download for frontend --- README.md | 40 +++++++++++ docker-compose.yml | 1 + main.py | 108 ++++++++++++++++++++++------- requirements.txt | 3 +- test/test_api.py | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++- utils.py | 13 +++- 6 files changed, 334 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 2546c5c..202f307 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,11 @@ Note that you can add multiple hosts from which you want to accept API requests. a space. **IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *` +Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10` + +This specifies the max number of files that can be bulk downloaded using the api. +Note: Each file has a max file size of 16mb + Finally, restart uWSGI and your web server: ``` @@ -149,6 +154,41 @@ The `file` parameter is the base64 encoded file which will be cleaned. ] ``` +**Endpoint:** `/api/download/bulk` + +This endpoint allows you to bulk download several files +which you uploaded beforehand. Note that the `download_list` +MUST contain more than two files. The max length is configurable +(default is 10). + +**HTTP Verbs:** POST + +**Body:** +```json +{ + "download_list": [ + { + "file_name": "uploaded_file_name.jpg", + "key": "uploaded_file_key" + } + ] +} +``` + +The `file_name` parameter takes the file name from a previously uploaded file. +The `key` parameter is the key from a previously uploaded file. + +**Example Response:** +```json +{ + "output_filename": "files.2cd225d5-2d75-44a2-9f26-e120a87e4279.cleaned.zip", + "mime": "application/zip", + "key": "5ee4cf8821226340d3d5ed16bd2e1b435234a9ad218f282b489a85d116e7a4c4", + "meta_after": {}, + "download_link": "http://localhost/api/download/5ee4cf8821226340d3d5ed16bd2e1b435234a9ad218f282b489a85d116e7a4c4/files.2cd225d5-2d75-44a2-9f26-e120a87e4279.cleaned.zip" +} +``` + # Docker There are two Dockerfiles present in this repository. The file called `Dockerfile.development` is used for development and `Dockerfile.production` is used for production deployments. diff --git a/docker-compose.yml b/docker-compose.yml index e758801..e925447 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,6 +8,7 @@ services: - FLASK_APP=main.py - FLASK_ENV=development - MAT2_ALLOW_ORIGIN_WHITELIST=* + - MAT2_MAX_FILES_BULK_DOWNLOAD=10 ports: - "5000:5000" volumes: diff --git a/main.py b/main.py index b059bfe..9349ec1 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,15 @@ import os import hmac import mimetypes as mtype +from uuid import uuid4 import jinja2 import base64 import io import binascii -import utils +import zipfile +from cerberus import Validator +import utils from libmat2 import parser_factory from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request from flask_restful import Resource, Api, reqparse, abort @@ -119,6 +122,19 @@ def create_app(test_config=None): complete_path = os.path.join(app.config['UPLOAD_FOLDER'], filepath) return complete_path, filepath + def is_valid_api_download_file(filename, key): + if filename != secure_filename(filename): + abort(400, message='Insecure filename') + + complete_path, filepath = get_file_paths(filename) + + if not os.path.exists(complete_path): + abort(404, message='File not found') + + if hmac.compare_digest(utils.hash_file(complete_path), key) is False: + abort(400, message='The file hash does not match') + return complete_path, filepath + class APIUpload(Resource): def post(self): @@ -145,30 +161,18 @@ def create_app(test_config=None): abort(500, message='Unable to clean %s' % mime) key, meta_after, output_filename = cleanup(parser, filepath) - return { - 'output_filename': output_filename, - 'mime': mime, - 'key': key, - 'meta': meta, - 'meta_after': meta_after, - 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) - } + return utils.return_file_created_response( + output_filename, + mime, + key, + meta, + meta_after, + urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) + ) class APIDownload(Resource): def get(self, key: str, filename: str): - - if filename != secure_filename(filename): - abort(400, message='Insecure filename') - - complete_path, filepath = get_file_paths(filename) - - if not os.path.exists(complete_path): - abort(404, message='File not found') - return redirect(url_for('upload_file')) - - if hmac.compare_digest(utils.hash_file(complete_path), key) is False: - abort(400, message='The file hash does not match') - return redirect(url_for('upload_file')) + complete_path, filepath = is_valid_api_download_file(filename, key) @after_this_request def remove_file(response): @@ -177,16 +181,72 @@ def create_app(test_config=None): return send_from_directory(app.config['UPLOAD_FOLDER'], filepath) - class APIMSupportedExtensions(Resource): + class APIBulkDownloadCreator(Resource): + schema = { + 'download_list': { + 'type': 'list', + 'minlength': 2, + 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)), + 'schema': { + 'type': 'dict', + 'schema': { + 'key': {'type': 'string'}, + 'file_name': {'type': 'string'} + } + } + } + } + v = Validator(schema) + + def post(self): + utils.check_upload_folder(app.config['UPLOAD_FOLDER']) + data = request.json + if not self.v.validate(data): + abort(400, message=self.v.errors) + # prevent the zip file from being overwritten + zip_filename = 'files.' + str(uuid4()) + '.zip' + zip_path = os.path.join(app.config['UPLOAD_FOLDER'], zip_filename) + cleaned_files_zip = zipfile.ZipFile(zip_path, 'w') + with cleaned_files_zip: + for file_candidate in data['download_list']: + complete_path, file_path = is_valid_api_download_file( + file_candidate['file_name'], + file_candidate['key'] + ) + try: + cleaned_files_zip.write(complete_path) + except ValueError: + abort(400, message='Creating the archive failed') + + try: + cleaned_files_zip.testzip() + except ValueError as e: + abort(400, message=str(e)) + + parser, mime = get_file_parser(zip_path) + if not parser.remove_all(): + abort(500, message='Unable to clean %s' % mime) + key, meta_after, output_filename = cleanup(parser, zip_path) + return { + 'output_filename': output_filename, + 'mime': mime, + 'key': key, + 'meta_after': meta_after, + 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) + }, 201 + + class APISupportedExtensions(Resource): def get(self): return get_supported_extensions() api.add_resource(APIUpload, '/api/upload') api.add_resource(APIDownload, '/api/download//') - api.add_resource(APIMSupportedExtensions, '/api/extension') + api.add_resource(APIBulkDownloadCreator, '/api/download/bulk') + api.add_resource(APISupportedExtensions, '/api/extension') return app + app = create_app() if __name__ == '__main__': # pragma: no cover diff --git a/requirements.txt b/requirements.txt index 8796aaa..42b75e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ bubblewrap==1.2.0 mat2==0.9.0 flask==1.0.3 Flask-RESTful==0.3.7 -Flask-Cors==3.0.8 \ No newline at end of file +Flask-Cors==3.0.8 +Cerberus==1.3.1 \ No newline at end of file diff --git a/test/test_api.py b/test/test_api.py index de33355..b4cbd57 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,8 +1,8 @@ import unittest import tempfile -import shutil import json import os +import shutil import main @@ -35,7 +35,7 @@ class Mat2APITestCase(unittest.TestCase): self.assertEqual(request.status_code, 200) data = json.loads(request.data.decode('utf-8')) - expected = { + expected = { 'output_filename': 'test_name.cleaned.jpg', 'mime': 'image/jpeg', 'key': '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161', @@ -151,6 +151,199 @@ class Mat2APITestCase(unittest.TestCase): request = self.app.get(data['download_link']) self.assertEqual(request.status_code, 200) + request = self.app.get(data['download_link']) + self.assertEqual(request.status_code, 404) + + def test_api_bulk_download(self): + request = self.app.post('/api/upload', + data='{"file_name": "test_name.jpg", ' + '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf' + 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}', + headers={'content-type': 'application/json'} + ) + self.assertEqual(request.status_code, 200) + upload_one = json.loads(request.data.decode('utf-8')) + + request = self.app.post('/api/upload', + data='{"file_name": "test_name_two.jpg", ' + '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42' + 'mO0vqpQDwAENAGxOnU0jQAAAABJRU5ErkJggg=="}', + headers={'content-type': 'application/json'} + ) + self.assertEqual(request.status_code, 200) + upload_two = json.loads(request.data.decode('utf-8')) + + post_body = { + u'download_list': [ + { + u'file_name': upload_one['output_filename'], + u'key': upload_one['key'] + }, + { + u'file_name': upload_two['output_filename'], + u'key': upload_two['key'] + } + ] + } + request = self.app.post('/api/download/bulk', + data=json.dumps(post_body), + headers={'content-type': 'application/json'} + ) + + response = json.loads(request.data.decode('utf-8')) + self.assertEqual(request.status_code, 201) + + self.assertIn( + "http://localhost/api/download/", + response['download_link'] + ) + self.assertIn( + ".cleaned.zip", + response['download_link'] + ) + + self.assertIn('files.', response['output_filename']) + self.assertIn('cleaned.zip', response['output_filename']) + self.assertIn(response['mime'], 'application/zip') + self.assertEqual(response['meta_after'], {}) + + request = self.app.get(response['download_link']) + self.assertEqual(request.status_code, 200) + + request = self.app.get(response['download_link']) + self.assertEqual(request.status_code, 404) + + def test_api_bulk_download_validation(self): + post_body = { + u'download_list': [ + { + u'file_name': 'invalid_file_name', + u'key': 'invalid_key' + } + ] + } + request = self.app.post('/api/download/bulk', + data=json.dumps(post_body), + headers={'content-type': 'application/json'} + ) + + response = json.loads(request.data.decode('utf-8')) + self.assertEqual(response['message']['download_list'][0], 'min length is 2') + self.assertEqual(request.status_code, 400) + + post_body = { + u'download_list': [ + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + }, + { + u'file_name': 'test.jpg', + u'key': 'key' + } + ] + } + request = self.app.post('/api/download/bulk', + data=json.dumps(post_body), + headers={'content-type': 'application/json'} + ) + + response = json.loads(request.data.decode('utf-8')) + self.assertEqual(response['message']['download_list'][0], 'max length is 10') + self.assertEqual(request.status_code, 400) + + post_body = { + u'download_list': [ + { + u'file_name_x': 'invalid_file_name', + u'key_x': 'invalid_key' + }, + { + u'file_name_x': 'invalid_file_name', + u'key_x': 'invalid_key' + } + ] + } + request = self.app.post('/api/download/bulk', + data=json.dumps(post_body), + headers={'content-type': 'application/json'} + ) + + response = json.loads(request.data.decode('utf-8')) + expected = { + 'message': { + 'download_list': [ + { + '0': [{ + 'file_name_x': ['unknown field'], + 'key_x': ['unknown field'] + }], + '1': [{ + 'file_name_x': ['unknown field'], + 'key_x': ['unknown field'] + }] + } + ] + } + } + self.assertEqual(response, expected) + self.assertEqual(request.status_code, 400) + + post_body = { + u'download_list': [ + { + u'file_name': 'invalid_file_name1', + u'key': 'invalid_key1' + }, + { + u'file_name': 'invalid_file_name2', + u'key': 'invalid_key2' + } + ] + } + request = self.app.post('/api/download/bulk', + data=json.dumps(post_body), + headers={'content-type': 'application/json'} + ) + response = json.loads(request.data.decode('utf-8')) + self.assertEqual('File not found', response['message']) + if __name__ == '__main__': unittest.main() diff --git a/utils.py b/utils.py index fb2fb08..023c838 100644 --- a/utils.py +++ b/utils.py @@ -19,4 +19,15 @@ def hash_file(filepath: str) -> str: def check_upload_folder(upload_folder): if not os.path.exists(upload_folder): - os.mkdir(upload_folder) \ No newline at end of file + os.mkdir(upload_folder) + + +def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link): + return { + 'output_filename': output_filename, + 'mime': mime, + 'key': key, + 'meta': meta, + 'meta_after': meta_after, + 'download_link': download_link + } -- cgit v1.3