summaryrefslogtreecommitdiff
path: root/matweb
diff options
context:
space:
mode:
authorjfriedli2020-04-23 10:39:35 -0700
committerjfriedli2020-04-23 10:39:35 -0700
commite1bac8b6a7fd857f38b7bcb678398c82baaa8fd5 (patch)
treefa87e526289e455f2f17b86973d08eb6850e721f /matweb
parentd14988fa3fa97f549fb8eaf601cb2c687cdce143 (diff)
Refactoring
Diffstat (limited to 'matweb')
-rw-r--r--matweb/file_removal_scheduler.py26
-rw-r--r--matweb/frontend.py77
-rw-r--r--matweb/rest_api.py139
-rw-r--r--matweb/utils.py91
4 files changed, 333 insertions, 0 deletions
diff --git a/matweb/file_removal_scheduler.py b/matweb/file_removal_scheduler.py
new file mode 100644
index 0000000..2ce7912
--- /dev/null
+++ b/matweb/file_removal_scheduler.py
@@ -0,0 +1,26 @@
1import glob
2import time
3import sys
4import os
5import random
6
7
8def run_file_removal_job(upload_folder_path):
9 if random.randint(0, 10) == 0:
10 for file in glob.glob(upload_folder_path + '/*'):
11 delete_file_when_too_old(file)
12
13
14def delete_file_when_too_old(filepath):
15 file_mod_time = os.stat(filepath).st_mtime
16
17 # time in second since last modification of file
18 last_time = time.time() - file_mod_time
19
20 # if file is older than our configured max timeframe, delete it
21 if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)):
22 try:
23 os.remove(filepath)
24 except OSError:
25 print('Automatic File Removal failed on file: ' + str(filepath))
26 sys.exit(1)
diff --git a/matweb/frontend.py b/matweb/frontend.py
new file mode 100644
index 0000000..93432b4
--- /dev/null
+++ b/matweb/frontend.py
@@ -0,0 +1,77 @@
1import hmac
2import os
3
4from flask import Blueprint, render_template, url_for, current_app, after_this_request, send_from_directory, request, \
5 flash
6from werkzeug.utils import secure_filename, redirect
7
8from matweb import file_removal_scheduler, utils
9
10routes = Blueprint('routes', __name__)
11
12
13@routes.route('/info')
14def info():
15 utils.get_supported_extensions()
16 return render_template(
17 'info.html', extensions=utils.get_supported_extensions()
18 )
19
20
21@routes.route('/download/<string:key>/<string:filename>')
22def download_file(key: str, filename: str):
23 if filename != secure_filename(filename):
24 return redirect(url_for('routes.upload_file'))
25
26 complete_path, filepath = utils.get_file_paths(filename, current_app.config['UPLOAD_FOLDER'])
27 file_removal_scheduler.run_file_removal_job(current_app.config['UPLOAD_FOLDER'])
28
29 if not os.path.exists(complete_path):
30 return redirect(url_for('routes.upload_file'))
31 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
32 return redirect(url_for('routes.upload_file'))
33
34 @after_this_request
35 def remove_file(response):
36 if os.path.exists(complete_path):
37 os.remove(complete_path)
38 return response
39 return send_from_directory(current_app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
40
41
42@routes.route('/', methods=['GET', 'POST'])
43def upload_file():
44 utils.check_upload_folder(current_app.config['UPLOAD_FOLDER'])
45 mime_types = utils.get_supported_extensions()
46
47 if request.method == 'POST':
48 if 'file' not in request.files: # check if the post request has the file part
49 flash('No file part')
50 return redirect(request.url)
51
52 uploaded_file = request.files['file']
53 if not uploaded_file.filename:
54 flash('No selected file')
55 return redirect(request.url)
56
57 filename, filepath = utils.save_file(uploaded_file, current_app.config['UPLOAD_FOLDER'])
58 parser, mime = utils.get_file_parser(filepath)
59
60 if parser is None:
61 flash('The type %s is not supported' % mime)
62 return redirect(url_for('routes.upload_file'))
63
64 meta = parser.get_meta()
65
66 if parser.remove_all() is not True:
67 flash('Unable to clean %s' % mime)
68 return redirect(url_for('routes.upload_file'))
69
70 key, meta_after, output_filename = utils.cleanup(parser, filepath, current_app.config['UPLOAD_FOLDER'])
71
72 return render_template(
73 'download.html', mimetypes=mime_types, meta=meta, filename=output_filename, meta_after=meta_after, key=key
74 )
75
76 max_file_size = int(current_app.config['MAX_CONTENT_LENGTH'] / 1024 / 1024)
77 return render_template('index.html', max_file_size=max_file_size, mimetypes=mime_types) \ No newline at end of file
diff --git a/matweb/rest_api.py b/matweb/rest_api.py
new file mode 100644
index 0000000..60d834f
--- /dev/null
+++ b/matweb/rest_api.py
@@ -0,0 +1,139 @@
1import os
2import base64
3import io
4import binascii
5import zipfile
6from uuid import uuid4
7
8from flask import after_this_request, send_from_directory
9from flask_restful import Resource, reqparse, abort, request
10from cerberus import Validator
11from werkzeug.datastructures import FileStorage
12from urllib.parse import urljoin
13
14from matweb import file_removal_scheduler, utils
15
16
17class APIUpload(Resource):
18
19 def __init__(self, **kwargs):
20 self.upload_folder = kwargs['upload_folder']
21
22 def post(self):
23 utils.check_upload_folder(self.upload_folder)
24 req_parser = reqparse.RequestParser()
25 req_parser.add_argument('file_name', type=str, required=True, help='Post parameter is not specified: file_name')
26 req_parser.add_argument('file', type=str, required=True, help='Post parameter is not specified: file')
27
28 args = req_parser.parse_args()
29 try:
30 file_data = base64.b64decode(args['file'])
31 except binascii.Error as err:
32 abort(400, message='Failed decoding file: ' + str(err))
33
34 file = FileStorage(stream=io.BytesIO(file_data), filename=args['file_name'])
35 filename, filepath = utils.save_file(file, self.upload_folder)
36 parser, mime = utils.get_file_parser(filepath)
37
38 if parser is None:
39 abort(415, message='The type %s is not supported' % mime)
40
41 meta = parser.get_meta()
42 if not parser.remove_all():
43 abort(500, message='Unable to clean %s' % mime)
44
45 key, meta_after, output_filename = utils.cleanup(parser, filepath, self.upload_folder)
46 return utils.return_file_created_response(
47 output_filename,
48 mime,
49 key,
50 meta,
51 meta_after,
52 urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
53 )
54
55
56class APIDownload(Resource):
57
58 def __init__(self, **kwargs):
59 self.upload_folder = kwargs['upload_folder']
60
61 def get(self, key: str, filename: str):
62 complete_path, filepath = utils.is_valid_api_download_file(filename, key, self.upload_folder)
63 # Make sure the file is NOT deleted on HEAD requests
64 if request.method == 'GET':
65 file_removal_scheduler.run_file_removal_job(self.upload_folder)
66
67 @after_this_request
68 def remove_file(response):
69 if os.path.exists(complete_path):
70 os.remove(complete_path)
71 return response
72
73 return send_from_directory(self.upload_folder, filepath, as_attachment=True)
74
75
76class APIBulkDownloadCreator(Resource):
77
78 def __init__(self, **kwargs):
79 self.upload_folder = kwargs['upload_folder']
80
81 schema = {
82 'download_list': {
83 'type': 'list',
84 'minlength': 2,
85 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)),
86 'schema': {
87 'type': 'dict',
88 'schema': {
89 'key': {'type': 'string', 'required': True},
90 'file_name': {'type': 'string', 'required': True}
91 }
92 }
93 }
94 }
95 v = Validator(schema)
96
97 def post(self):
98 utils.check_upload_folder(self.upload_folder)
99 data = request.json
100 if not self.v.validate(data):
101 abort(400, message=self.v.errors)
102 # prevent the zip file from being overwritten
103 zip_filename = 'files.' + str(uuid4()) + '.zip'
104 zip_path = os.path.join(self.upload_folder, zip_filename)
105 cleaned_files_zip = zipfile.ZipFile(zip_path, 'w')
106 with cleaned_files_zip:
107 for file_candidate in data['download_list']:
108 complete_path, file_path = utils.is_valid_api_download_file(
109 file_candidate['file_name'],
110 file_candidate['key'],
111 self.upload_folder
112 )
113 try:
114 cleaned_files_zip.write(complete_path)
115 os.remove(complete_path)
116 except ValueError:
117 abort(400, message='Creating the archive failed')
118
119 try:
120 cleaned_files_zip.testzip()
121 except ValueError as e:
122 abort(400, message=str(e))
123
124 parser, mime = utils.get_file_parser(zip_path)
125 if not parser.remove_all():
126 abort(500, message='Unable to clean %s' % mime)
127 key, meta_after, output_filename = utils.cleanup(parser, zip_path, self.upload_folder)
128 return {
129 'output_filename': output_filename,
130 'mime': mime,
131 'key': key,
132 'meta_after': meta_after,
133 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
134 }, 201
135
136
137class APISupportedExtensions(Resource):
138 def get(self):
139 return utils.get_supported_extensions()
diff --git a/matweb/utils.py b/matweb/utils.py
new file mode 100644
index 0000000..8dfff45
--- /dev/null
+++ b/matweb/utils.py
@@ -0,0 +1,91 @@
1import hmac
2import os
3import hashlib
4import mimetypes as mtype
5
6from flask_restful import abort
7from libmat2 import parser_factory
8from werkzeug.utils import secure_filename
9
10
11def get_allow_origin_header_value():
12 return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ")
13
14
15def hash_file(filepath: str) -> str:
16 sha256 = hashlib.sha256()
17 with open(filepath, 'rb') as f:
18 while True:
19 data = f.read(65536) # read the file by chunk of 64k
20 if not data:
21 break
22 sha256.update(data)
23 return sha256.hexdigest()
24
25
26def check_upload_folder(upload_folder):
27 if not os.path.exists(upload_folder):
28 os.mkdir(upload_folder)
29
30
31def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link):
32 return {
33 'output_filename': output_filename,
34 'mime': mime,
35 'key': key,
36 'meta': meta,
37 'meta_after': meta_after,
38 'download_link': download_link
39 }
40
41
42def get_supported_extensions():
43 extensions = set()
44 for parser in parser_factory._get_parsers():
45 for m in parser.mimetypes:
46 extensions |= set(mtype.guess_all_extensions(m, strict=False))
47 # since `guess_extension` might return `None`, we need to filter it out
48 return sorted(filter(None, extensions))
49
50
51def save_file(file, upload_folder):
52 filename = secure_filename(file.filename)
53 filepath = os.path.join(upload_folder, filename)
54 file.save(os.path.join(filepath))
55 return filename, filepath
56
57
58def get_file_parser(filepath: str):
59 parser, mime = parser_factory.get_parser(filepath)
60 return parser, mime
61
62
63def cleanup(parser, filepath, upload_folder):
64 output_filename = os.path.basename(parser.output_filename)
65 parser, _ = parser_factory.get_parser(parser.output_filename)
66 meta_after = parser.get_meta()
67 os.remove(filepath)
68
69 key = hash_file(os.path.join(upload_folder, output_filename))
70 return key, meta_after, output_filename
71
72
73def get_file_paths(filename, upload_folder):
74 filepath = secure_filename(filename)
75
76 complete_path = os.path.join(upload_folder, filepath)
77 return complete_path, filepath
78
79
80def is_valid_api_download_file(filename, key, upload_folder):
81 if filename != secure_filename(filename):
82 abort(400, message='Insecure filename')
83
84 complete_path, filepath = get_file_paths(filename, upload_folder)
85
86 if not os.path.exists(complete_path):
87 abort(404, message='File not found')
88
89 if hmac.compare_digest(hash_file(complete_path), key) is False:
90 abort(400, message='The file hash does not match')
91 return complete_path, filepath