diff options
| author | jfriedli | 2020-04-23 10:39:35 -0700 |
|---|---|---|
| committer | jfriedli | 2020-04-23 10:39:35 -0700 |
| commit | e1bac8b6a7fd857f38b7bcb678398c82baaa8fd5 (patch) | |
| tree | fa87e526289e455f2f17b86973d08eb6850e721f /matweb | |
| parent | d14988fa3fa97f549fb8eaf601cb2c687cdce143 (diff) | |
Refactoring
Diffstat (limited to 'matweb')
| -rw-r--r-- | matweb/file_removal_scheduler.py | 26 | ||||
| -rw-r--r-- | matweb/frontend.py | 77 | ||||
| -rw-r--r-- | matweb/rest_api.py | 139 | ||||
| -rw-r--r-- | matweb/utils.py | 91 |
4 files changed, 333 insertions, 0 deletions
diff --git a/matweb/file_removal_scheduler.py b/matweb/file_removal_scheduler.py new file mode 100644 index 0000000..2ce7912 --- /dev/null +++ b/matweb/file_removal_scheduler.py | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | import glob | ||
| 2 | import time | ||
| 3 | import sys | ||
| 4 | import os | ||
| 5 | import random | ||
| 6 | |||
| 7 | |||
| 8 | def run_file_removal_job(upload_folder_path): | ||
| 9 | if random.randint(0, 10) == 0: | ||
| 10 | for file in glob.glob(upload_folder_path + '/*'): | ||
| 11 | delete_file_when_too_old(file) | ||
| 12 | |||
| 13 | |||
| 14 | def delete_file_when_too_old(filepath): | ||
| 15 | file_mod_time = os.stat(filepath).st_mtime | ||
| 16 | |||
| 17 | # time in second since last modification of file | ||
| 18 | last_time = time.time() - file_mod_time | ||
| 19 | |||
| 20 | # if file is older than our configured max timeframe, delete it | ||
| 21 | if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)): | ||
| 22 | try: | ||
| 23 | os.remove(filepath) | ||
| 24 | except OSError: | ||
| 25 | print('Automatic File Removal failed on file: ' + str(filepath)) | ||
| 26 | sys.exit(1) | ||
diff --git a/matweb/frontend.py b/matweb/frontend.py new file mode 100644 index 0000000..93432b4 --- /dev/null +++ b/matweb/frontend.py | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | import hmac | ||
| 2 | import os | ||
| 3 | |||
| 4 | from flask import Blueprint, render_template, url_for, current_app, after_this_request, send_from_directory, request, \ | ||
| 5 | flash | ||
| 6 | from werkzeug.utils import secure_filename, redirect | ||
| 7 | |||
| 8 | from matweb import file_removal_scheduler, utils | ||
| 9 | |||
| 10 | routes = Blueprint('routes', __name__) | ||
| 11 | |||
| 12 | |||
| 13 | @routes.route('/info') | ||
| 14 | def info(): | ||
| 15 | utils.get_supported_extensions() | ||
| 16 | return render_template( | ||
| 17 | 'info.html', extensions=utils.get_supported_extensions() | ||
| 18 | ) | ||
| 19 | |||
| 20 | |||
| 21 | @routes.route('/download/<string:key>/<string:filename>') | ||
| 22 | def download_file(key: str, filename: str): | ||
| 23 | if filename != secure_filename(filename): | ||
| 24 | return redirect(url_for('routes.upload_file')) | ||
| 25 | |||
| 26 | complete_path, filepath = utils.get_file_paths(filename, current_app.config['UPLOAD_FOLDER']) | ||
| 27 | file_removal_scheduler.run_file_removal_job(current_app.config['UPLOAD_FOLDER']) | ||
| 28 | |||
| 29 | if not os.path.exists(complete_path): | ||
| 30 | return redirect(url_for('routes.upload_file')) | ||
| 31 | if hmac.compare_digest(utils.hash_file(complete_path), key) is False: | ||
| 32 | return redirect(url_for('routes.upload_file')) | ||
| 33 | |||
| 34 | @after_this_request | ||
| 35 | def remove_file(response): | ||
| 36 | if os.path.exists(complete_path): | ||
| 37 | os.remove(complete_path) | ||
| 38 | return response | ||
| 39 | return send_from_directory(current_app.config['UPLOAD_FOLDER'], filepath, as_attachment=True) | ||
| 40 | |||
| 41 | |||
| 42 | @routes.route('/', methods=['GET', 'POST']) | ||
| 43 | def upload_file(): | ||
| 44 | utils.check_upload_folder(current_app.config['UPLOAD_FOLDER']) | ||
| 45 | mime_types = utils.get_supported_extensions() | ||
| 46 | |||
| 47 | if request.method == 'POST': | ||
| 48 | if 'file' not in request.files: # check if the post request has the file part | ||
| 49 | flash('No file part') | ||
| 50 | return redirect(request.url) | ||
| 51 | |||
| 52 | uploaded_file = request.files['file'] | ||
| 53 | if not uploaded_file.filename: | ||
| 54 | flash('No selected file') | ||
| 55 | return redirect(request.url) | ||
| 56 | |||
| 57 | filename, filepath = utils.save_file(uploaded_file, current_app.config['UPLOAD_FOLDER']) | ||
| 58 | parser, mime = utils.get_file_parser(filepath) | ||
| 59 | |||
| 60 | if parser is None: | ||
| 61 | flash('The type %s is not supported' % mime) | ||
| 62 | return redirect(url_for('routes.upload_file')) | ||
| 63 | |||
| 64 | meta = parser.get_meta() | ||
| 65 | |||
| 66 | if parser.remove_all() is not True: | ||
| 67 | flash('Unable to clean %s' % mime) | ||
| 68 | return redirect(url_for('routes.upload_file')) | ||
| 69 | |||
| 70 | key, meta_after, output_filename = utils.cleanup(parser, filepath, current_app.config['UPLOAD_FOLDER']) | ||
| 71 | |||
| 72 | return render_template( | ||
| 73 | 'download.html', mimetypes=mime_types, meta=meta, filename=output_filename, meta_after=meta_after, key=key | ||
| 74 | ) | ||
| 75 | |||
| 76 | max_file_size = int(current_app.config['MAX_CONTENT_LENGTH'] / 1024 / 1024) | ||
| 77 | return render_template('index.html', max_file_size=max_file_size, mimetypes=mime_types) \ No newline at end of file | ||
diff --git a/matweb/rest_api.py b/matweb/rest_api.py new file mode 100644 index 0000000..60d834f --- /dev/null +++ b/matweb/rest_api.py | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | import os | ||
| 2 | import base64 | ||
| 3 | import io | ||
| 4 | import binascii | ||
| 5 | import zipfile | ||
| 6 | from uuid import uuid4 | ||
| 7 | |||
| 8 | from flask import after_this_request, send_from_directory | ||
| 9 | from flask_restful import Resource, reqparse, abort, request | ||
| 10 | from cerberus import Validator | ||
| 11 | from werkzeug.datastructures import FileStorage | ||
| 12 | from urllib.parse import urljoin | ||
| 13 | |||
| 14 | from matweb import file_removal_scheduler, utils | ||
| 15 | |||
| 16 | |||
| 17 | class APIUpload(Resource): | ||
| 18 | |||
| 19 | def __init__(self, **kwargs): | ||
| 20 | self.upload_folder = kwargs['upload_folder'] | ||
| 21 | |||
| 22 | def post(self): | ||
| 23 | utils.check_upload_folder(self.upload_folder) | ||
| 24 | req_parser = reqparse.RequestParser() | ||
| 25 | req_parser.add_argument('file_name', type=str, required=True, help='Post parameter is not specified: file_name') | ||
| 26 | req_parser.add_argument('file', type=str, required=True, help='Post parameter is not specified: file') | ||
| 27 | |||
| 28 | args = req_parser.parse_args() | ||
| 29 | try: | ||
| 30 | file_data = base64.b64decode(args['file']) | ||
| 31 | except binascii.Error as err: | ||
| 32 | abort(400, message='Failed decoding file: ' + str(err)) | ||
| 33 | |||
| 34 | file = FileStorage(stream=io.BytesIO(file_data), filename=args['file_name']) | ||
| 35 | filename, filepath = utils.save_file(file, self.upload_folder) | ||
| 36 | parser, mime = utils.get_file_parser(filepath) | ||
| 37 | |||
| 38 | if parser is None: | ||
| 39 | abort(415, message='The type %s is not supported' % mime) | ||
| 40 | |||
| 41 | meta = parser.get_meta() | ||
| 42 | if not parser.remove_all(): | ||
| 43 | abort(500, message='Unable to clean %s' % mime) | ||
| 44 | |||
| 45 | key, meta_after, output_filename = utils.cleanup(parser, filepath, self.upload_folder) | ||
| 46 | return utils.return_file_created_response( | ||
| 47 | output_filename, | ||
| 48 | mime, | ||
| 49 | key, | ||
| 50 | meta, | ||
| 51 | meta_after, | ||
| 52 | urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) | ||
| 53 | ) | ||
| 54 | |||
| 55 | |||
| 56 | class APIDownload(Resource): | ||
| 57 | |||
| 58 | def __init__(self, **kwargs): | ||
| 59 | self.upload_folder = kwargs['upload_folder'] | ||
| 60 | |||
| 61 | def get(self, key: str, filename: str): | ||
| 62 | complete_path, filepath = utils.is_valid_api_download_file(filename, key, self.upload_folder) | ||
| 63 | # Make sure the file is NOT deleted on HEAD requests | ||
| 64 | if request.method == 'GET': | ||
| 65 | file_removal_scheduler.run_file_removal_job(self.upload_folder) | ||
| 66 | |||
| 67 | @after_this_request | ||
| 68 | def remove_file(response): | ||
| 69 | if os.path.exists(complete_path): | ||
| 70 | os.remove(complete_path) | ||
| 71 | return response | ||
| 72 | |||
| 73 | return send_from_directory(self.upload_folder, filepath, as_attachment=True) | ||
| 74 | |||
| 75 | |||
| 76 | class APIBulkDownloadCreator(Resource): | ||
| 77 | |||
| 78 | def __init__(self, **kwargs): | ||
| 79 | self.upload_folder = kwargs['upload_folder'] | ||
| 80 | |||
| 81 | schema = { | ||
| 82 | 'download_list': { | ||
| 83 | 'type': 'list', | ||
| 84 | 'minlength': 2, | ||
| 85 | 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)), | ||
| 86 | 'schema': { | ||
| 87 | 'type': 'dict', | ||
| 88 | 'schema': { | ||
| 89 | 'key': {'type': 'string', 'required': True}, | ||
| 90 | 'file_name': {'type': 'string', 'required': True} | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | v = Validator(schema) | ||
| 96 | |||
| 97 | def post(self): | ||
| 98 | utils.check_upload_folder(self.upload_folder) | ||
| 99 | data = request.json | ||
| 100 | if not self.v.validate(data): | ||
| 101 | abort(400, message=self.v.errors) | ||
| 102 | # prevent the zip file from being overwritten | ||
| 103 | zip_filename = 'files.' + str(uuid4()) + '.zip' | ||
| 104 | zip_path = os.path.join(self.upload_folder, zip_filename) | ||
| 105 | cleaned_files_zip = zipfile.ZipFile(zip_path, 'w') | ||
| 106 | with cleaned_files_zip: | ||
| 107 | for file_candidate in data['download_list']: | ||
| 108 | complete_path, file_path = utils.is_valid_api_download_file( | ||
| 109 | file_candidate['file_name'], | ||
| 110 | file_candidate['key'], | ||
| 111 | self.upload_folder | ||
| 112 | ) | ||
| 113 | try: | ||
| 114 | cleaned_files_zip.write(complete_path) | ||
| 115 | os.remove(complete_path) | ||
| 116 | except ValueError: | ||
| 117 | abort(400, message='Creating the archive failed') | ||
| 118 | |||
| 119 | try: | ||
| 120 | cleaned_files_zip.testzip() | ||
| 121 | except ValueError as e: | ||
| 122 | abort(400, message=str(e)) | ||
| 123 | |||
| 124 | parser, mime = utils.get_file_parser(zip_path) | ||
| 125 | if not parser.remove_all(): | ||
| 126 | abort(500, message='Unable to clean %s' % mime) | ||
| 127 | key, meta_after, output_filename = utils.cleanup(parser, zip_path, self.upload_folder) | ||
| 128 | return { | ||
| 129 | 'output_filename': output_filename, | ||
| 130 | 'mime': mime, | ||
| 131 | 'key': key, | ||
| 132 | 'meta_after': meta_after, | ||
| 133 | 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename)) | ||
| 134 | }, 201 | ||
| 135 | |||
| 136 | |||
| 137 | class APISupportedExtensions(Resource): | ||
| 138 | def get(self): | ||
| 139 | return utils.get_supported_extensions() | ||
diff --git a/matweb/utils.py b/matweb/utils.py new file mode 100644 index 0000000..8dfff45 --- /dev/null +++ b/matweb/utils.py | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | import hmac | ||
| 2 | import os | ||
| 3 | import hashlib | ||
| 4 | import mimetypes as mtype | ||
| 5 | |||
| 6 | from flask_restful import abort | ||
| 7 | from libmat2 import parser_factory | ||
| 8 | from werkzeug.utils import secure_filename | ||
| 9 | |||
| 10 | |||
| 11 | def get_allow_origin_header_value(): | ||
| 12 | return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ") | ||
| 13 | |||
| 14 | |||
| 15 | def hash_file(filepath: str) -> str: | ||
| 16 | sha256 = hashlib.sha256() | ||
| 17 | with open(filepath, 'rb') as f: | ||
| 18 | while True: | ||
| 19 | data = f.read(65536) # read the file by chunk of 64k | ||
| 20 | if not data: | ||
| 21 | break | ||
| 22 | sha256.update(data) | ||
| 23 | return sha256.hexdigest() | ||
| 24 | |||
| 25 | |||
| 26 | def check_upload_folder(upload_folder): | ||
| 27 | if not os.path.exists(upload_folder): | ||
| 28 | os.mkdir(upload_folder) | ||
| 29 | |||
| 30 | |||
| 31 | def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link): | ||
| 32 | return { | ||
| 33 | 'output_filename': output_filename, | ||
| 34 | 'mime': mime, | ||
| 35 | 'key': key, | ||
| 36 | 'meta': meta, | ||
| 37 | 'meta_after': meta_after, | ||
| 38 | 'download_link': download_link | ||
| 39 | } | ||
| 40 | |||
| 41 | |||
| 42 | def get_supported_extensions(): | ||
| 43 | extensions = set() | ||
| 44 | for parser in parser_factory._get_parsers(): | ||
| 45 | for m in parser.mimetypes: | ||
| 46 | extensions |= set(mtype.guess_all_extensions(m, strict=False)) | ||
| 47 | # since `guess_extension` might return `None`, we need to filter it out | ||
| 48 | return sorted(filter(None, extensions)) | ||
| 49 | |||
| 50 | |||
| 51 | def save_file(file, upload_folder): | ||
| 52 | filename = secure_filename(file.filename) | ||
| 53 | filepath = os.path.join(upload_folder, filename) | ||
| 54 | file.save(os.path.join(filepath)) | ||
| 55 | return filename, filepath | ||
| 56 | |||
| 57 | |||
| 58 | def get_file_parser(filepath: str): | ||
| 59 | parser, mime = parser_factory.get_parser(filepath) | ||
| 60 | return parser, mime | ||
| 61 | |||
| 62 | |||
| 63 | def cleanup(parser, filepath, upload_folder): | ||
| 64 | output_filename = os.path.basename(parser.output_filename) | ||
| 65 | parser, _ = parser_factory.get_parser(parser.output_filename) | ||
| 66 | meta_after = parser.get_meta() | ||
| 67 | os.remove(filepath) | ||
| 68 | |||
| 69 | key = hash_file(os.path.join(upload_folder, output_filename)) | ||
| 70 | return key, meta_after, output_filename | ||
| 71 | |||
| 72 | |||
| 73 | def get_file_paths(filename, upload_folder): | ||
| 74 | filepath = secure_filename(filename) | ||
| 75 | |||
| 76 | complete_path = os.path.join(upload_folder, filepath) | ||
| 77 | return complete_path, filepath | ||
| 78 | |||
| 79 | |||
| 80 | def is_valid_api_download_file(filename, key, upload_folder): | ||
| 81 | if filename != secure_filename(filename): | ||
| 82 | abort(400, message='Insecure filename') | ||
| 83 | |||
| 84 | complete_path, filepath = get_file_paths(filename, upload_folder) | ||
| 85 | |||
| 86 | if not os.path.exists(complete_path): | ||
| 87 | abort(404, message='File not found') | ||
| 88 | |||
| 89 | if hmac.compare_digest(hash_file(complete_path), key) is False: | ||
| 90 | abort(400, message='The file hash does not match') | ||
| 91 | return complete_path, filepath | ||
