summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.py260
-rw-r--r--matweb/file_removal_scheduler.py (renamed from file_removal_scheduler.py)0
-rw-r--r--matweb/frontend.py77
-rw-r--r--matweb/rest_api.py139
-rw-r--r--matweb/utils.py91
-rw-r--r--templates/download.html2
-rw-r--r--test/test.py11
-rw-r--r--test/test_api.py55
-rw-r--r--test/test_file_removal_scheduler.py18
-rw-r--r--utils.py33
10 files changed, 376 insertions, 310 deletions
diff --git a/main.py b/main.py
index 18811b9..c21c1a1 100644
--- a/main.py
+++ b/main.py
@@ -1,23 +1,10 @@
1import os 1import os
2import hmac
3import mimetypes as mtype
4from uuid import uuid4
5import jinja2 2import jinja2
6import base64
7import io
8import binascii
9import zipfile
10 3
11from cerberus import Validator 4from matweb import utils, rest_api, frontend
12import utils 5from flask import Flask
13import file_removal_scheduler 6from flask_restful import Api
14from libmat2 import parser_factory
15from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request
16from flask_restful import Resource, Api, reqparse, abort
17from werkzeug.utils import secure_filename
18from werkzeug.datastructures import FileStorage
19from flask_cors import CORS 7from flask_cors import CORS
20from urllib.parse import urljoin
21 8
22 9
23def create_app(test_config=None): 10def create_app(test_config=None):
@@ -32,235 +19,32 @@ def create_app(test_config=None):
32 if test_config is not None: 19 if test_config is not None:
33 app.config.update(test_config) 20 app.config.update(test_config)
34 21
22 # Non JS Frontend
35 app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore 23 app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore
36 jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']), 24 jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']),
37 app.jinja_loader, 25 app.jinja_loader,
38 ]) 26 ])
27 app.register_blueprint(frontend.routes)
39 28
29 # Restful API hookup
40 api = Api(app) 30 api = Api(app)
41 CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}}) 31 CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}})
42 32 api.add_resource(
43 @app.route('/info') 33 rest_api.APIUpload,
44 def info(): 34 '/api/upload',
45 get_supported_extensions() 35 resource_class_kwargs={'upload_folder': app.config['UPLOAD_FOLDER']}
46 return render_template( 36 )
47 'info.html', extensions=get_supported_extensions() 37 api.add_resource(
48 ) 38 rest_api.APIDownload,
49 39 '/api/download/<string:key>/<string:filename>',
50 @app.route('/download/<string:key>/<string:filename>') 40 resource_class_kwargs={'upload_folder': app.config['UPLOAD_FOLDER']}
51 def download_file(key: str, filename: str): 41 )
52 if filename != secure_filename(filename): 42 api.add_resource(
53 return redirect(url_for('upload_file')) 43 rest_api.APIBulkDownloadCreator,
54 44 '/api/download/bulk',
55 complete_path, filepath = get_file_paths(filename) 45 resource_class_kwargs={'upload_folder': app.config['UPLOAD_FOLDER']}
56 file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER']) 46 )
57 47 api.add_resource(rest_api.APISupportedExtensions, '/api/extension')
58 if not os.path.exists(complete_path):
59 return redirect(url_for('upload_file'))
60 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
61 return redirect(url_for('upload_file'))
62 @after_this_request
63 def remove_file(response):
64 if os.path.exists(complete_path):
65 os.remove(complete_path)
66 return response
67 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
68
69 @app.route('/', methods=['GET', 'POST'])
70 def upload_file():
71 utils.check_upload_folder(app.config['UPLOAD_FOLDER'])
72 mimetypes = get_supported_extensions()
73
74 if request.method == 'POST':
75 if 'file' not in request.files: # check if the post request has the file part
76 flash('No file part')
77 return redirect(request.url)
78
79 uploaded_file = request.files['file']
80 if not uploaded_file.filename:
81 flash('No selected file')
82 return redirect(request.url)
83
84 filename, filepath = save_file(uploaded_file)
85 parser, mime = get_file_parser(filepath)
86
87 if parser is None:
88 flash('The type %s is not supported' % mime)
89 return redirect(url_for('upload_file'))
90
91 meta = parser.get_meta()
92
93 if parser.remove_all() is not True:
94 flash('Unable to clean %s' % mime)
95 return redirect(url_for('upload_file'))
96
97 key, meta_after, output_filename = cleanup(parser, filepath)
98
99 return render_template(
100 'download.html', mimetypes=mimetypes, meta=meta, filename=output_filename, meta_after=meta_after, key=key
101 )
102
103 max_file_size = int(app.config['MAX_CONTENT_LENGTH'] / 1024 / 1024)
104 return render_template('index.html', max_file_size=max_file_size, mimetypes=mimetypes)
105
106 def get_supported_extensions():
107 extensions = set()
108 for parser in parser_factory._get_parsers():
109 for m in parser.mimetypes:
110 extensions |= set(mtype.guess_all_extensions(m, strict=False))
111 # since `guess_extension` might return `None`, we need to filter it out
112 return sorted(filter(None, extensions))
113
114 def save_file(file):
115 filename = secure_filename(file.filename)
116 filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
117 file.save(os.path.join(filepath))
118 return filename, filepath
119
120 def get_file_parser(filepath: str):
121 parser, mime = parser_factory.get_parser(filepath)
122 return parser, mime
123
124 def cleanup(parser, filepath):
125 output_filename = os.path.basename(parser.output_filename)
126 parser, _ = parser_factory.get_parser(parser.output_filename)
127 meta_after = parser.get_meta()
128 os.remove(filepath)
129
130 key = utils.hash_file(os.path.join(app.config['UPLOAD_FOLDER'], output_filename))
131 return key, meta_after, output_filename
132
133 def get_file_paths(filename):
134 filepath = secure_filename(filename)
135
136 complete_path = os.path.join(app.config['UPLOAD_FOLDER'], filepath)
137 return complete_path, filepath
138
139 def is_valid_api_download_file(filename, key):
140 if filename != secure_filename(filename):
141 abort(400, message='Insecure filename')
142
143 complete_path, filepath = get_file_paths(filename)
144
145 if not os.path.exists(complete_path):
146 abort(404, message='File not found')
147
148 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
149 abort(400, message='The file hash does not match')
150 return complete_path, filepath
151
152 class APIUpload(Resource):
153
154 def post(self):
155 utils.check_upload_folder(app.config['UPLOAD_FOLDER'])
156 req_parser = reqparse.RequestParser()
157 req_parser.add_argument('file_name', type=str, required=True, help='Post parameter is not specified: file_name')
158 req_parser.add_argument('file', type=str, required=True, help='Post parameter is not specified: file')
159
160 args = req_parser.parse_args()
161 try:
162 file_data = base64.b64decode(args['file'])
163 except binascii.Error as err:
164 abort(400, message='Failed decoding file: ' + str(err))
165
166 file = FileStorage(stream=io.BytesIO(file_data), filename=args['file_name'])
167 filename, filepath = save_file(file)
168 parser, mime = get_file_parser(filepath)
169
170 if parser is None:
171 abort(415, message='The type %s is not supported' % mime)
172
173 meta = parser.get_meta()
174 if not parser.remove_all():
175 abort(500, message='Unable to clean %s' % mime)
176
177 key, meta_after, output_filename = cleanup(parser, filepath)
178 return utils.return_file_created_response(
179 output_filename,
180 mime,
181 key,
182 meta,
183 meta_after,
184 urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
185 )
186
187 class APIDownload(Resource):
188 def get(self, key: str, filename: str):
189 complete_path, filepath = is_valid_api_download_file(filename, key)
190 # Make sure the file is NOT deleted on HEAD requests
191 if request.method == 'GET':
192 file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
193 @after_this_request
194 def remove_file(response):
195 if os.path.exists(complete_path):
196 os.remove(complete_path)
197 return response
198
199 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
200
201 class APIBulkDownloadCreator(Resource):
202 schema = {
203 'download_list': {
204 'type': 'list',
205 'minlength': 2,
206 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)),
207 'schema': {
208 'type': 'dict',
209 'schema': {
210 'key': {'type': 'string', 'required': True},
211 'file_name': {'type': 'string', 'required': True}
212 }
213 }
214 }
215 }
216 v = Validator(schema)
217
218 def post(self):
219 utils.check_upload_folder(app.config['UPLOAD_FOLDER'])
220 data = request.json
221 if not self.v.validate(data):
222 abort(400, message=self.v.errors)
223 # prevent the zip file from being overwritten
224 zip_filename = 'files.' + str(uuid4()) + '.zip'
225 zip_path = os.path.join(app.config['UPLOAD_FOLDER'], zip_filename)
226 cleaned_files_zip = zipfile.ZipFile(zip_path, 'w')
227 with cleaned_files_zip:
228 for file_candidate in data['download_list']:
229 complete_path, file_path = is_valid_api_download_file(
230 file_candidate['file_name'],
231 file_candidate['key']
232 )
233 try:
234 cleaned_files_zip.write(complete_path)
235 os.remove(complete_path)
236 except ValueError:
237 abort(400, message='Creating the archive failed')
238
239 try:
240 cleaned_files_zip.testzip()
241 except ValueError as e:
242 abort(400, message=str(e))
243
244 parser, mime = get_file_parser(zip_path)
245 if not parser.remove_all():
246 abort(500, message='Unable to clean %s' % mime)
247 key, meta_after, output_filename = cleanup(parser, zip_path)
248 return {
249 'output_filename': output_filename,
250 'mime': mime,
251 'key': key,
252 'meta_after': meta_after,
253 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
254 }, 201
255
256 class APISupportedExtensions(Resource):
257 def get(self):
258 return get_supported_extensions()
259
260 api.add_resource(APIUpload, '/api/upload')
261 api.add_resource(APIDownload, '/api/download/<string:key>/<string:filename>')
262 api.add_resource(APIBulkDownloadCreator, '/api/download/bulk')
263 api.add_resource(APISupportedExtensions, '/api/extension')
264 48
265 return app 49 return app
266 50
diff --git a/file_removal_scheduler.py b/matweb/file_removal_scheduler.py
index 2ce7912..2ce7912 100644
--- a/file_removal_scheduler.py
+++ b/matweb/file_removal_scheduler.py
diff --git a/matweb/frontend.py b/matweb/frontend.py
new file mode 100644
index 0000000..93432b4
--- /dev/null
+++ b/matweb/frontend.py
@@ -0,0 +1,77 @@
1import hmac
2import os
3
4from flask import Blueprint, render_template, url_for, current_app, after_this_request, send_from_directory, request, \
5 flash
6from werkzeug.utils import secure_filename, redirect
7
8from matweb import file_removal_scheduler, utils
9
10routes = Blueprint('routes', __name__)
11
12
13@routes.route('/info')
14def info():
15 utils.get_supported_extensions()
16 return render_template(
17 'info.html', extensions=utils.get_supported_extensions()
18 )
19
20
21@routes.route('/download/<string:key>/<string:filename>')
22def download_file(key: str, filename: str):
23 if filename != secure_filename(filename):
24 return redirect(url_for('routes.upload_file'))
25
26 complete_path, filepath = utils.get_file_paths(filename, current_app.config['UPLOAD_FOLDER'])
27 file_removal_scheduler.run_file_removal_job(current_app.config['UPLOAD_FOLDER'])
28
29 if not os.path.exists(complete_path):
30 return redirect(url_for('routes.upload_file'))
31 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
32 return redirect(url_for('routes.upload_file'))
33
34 @after_this_request
35 def remove_file(response):
36 if os.path.exists(complete_path):
37 os.remove(complete_path)
38 return response
39 return send_from_directory(current_app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
40
41
42@routes.route('/', methods=['GET', 'POST'])
43def upload_file():
44 utils.check_upload_folder(current_app.config['UPLOAD_FOLDER'])
45 mime_types = utils.get_supported_extensions()
46
47 if request.method == 'POST':
48 if 'file' not in request.files: # check if the post request has the file part
49 flash('No file part')
50 return redirect(request.url)
51
52 uploaded_file = request.files['file']
53 if not uploaded_file.filename:
54 flash('No selected file')
55 return redirect(request.url)
56
57 filename, filepath = utils.save_file(uploaded_file, current_app.config['UPLOAD_FOLDER'])
58 parser, mime = utils.get_file_parser(filepath)
59
60 if parser is None:
61 flash('The type %s is not supported' % mime)
62 return redirect(url_for('routes.upload_file'))
63
64 meta = parser.get_meta()
65
66 if parser.remove_all() is not True:
67 flash('Unable to clean %s' % mime)
68 return redirect(url_for('routes.upload_file'))
69
70 key, meta_after, output_filename = utils.cleanup(parser, filepath, current_app.config['UPLOAD_FOLDER'])
71
72 return render_template(
73 'download.html', mimetypes=mime_types, meta=meta, filename=output_filename, meta_after=meta_after, key=key
74 )
75
76 max_file_size = int(current_app.config['MAX_CONTENT_LENGTH'] / 1024 / 1024)
77 return render_template('index.html', max_file_size=max_file_size, mimetypes=mime_types) \ No newline at end of file
diff --git a/matweb/rest_api.py b/matweb/rest_api.py
new file mode 100644
index 0000000..60d834f
--- /dev/null
+++ b/matweb/rest_api.py
@@ -0,0 +1,139 @@
1import os
2import base64
3import io
4import binascii
5import zipfile
6from uuid import uuid4
7
8from flask import after_this_request, send_from_directory
9from flask_restful import Resource, reqparse, abort, request
10from cerberus import Validator
11from werkzeug.datastructures import FileStorage
12from urllib.parse import urljoin
13
14from matweb import file_removal_scheduler, utils
15
16
17class APIUpload(Resource):
18
19 def __init__(self, **kwargs):
20 self.upload_folder = kwargs['upload_folder']
21
22 def post(self):
23 utils.check_upload_folder(self.upload_folder)
24 req_parser = reqparse.RequestParser()
25 req_parser.add_argument('file_name', type=str, required=True, help='Post parameter is not specified: file_name')
26 req_parser.add_argument('file', type=str, required=True, help='Post parameter is not specified: file')
27
28 args = req_parser.parse_args()
29 try:
30 file_data = base64.b64decode(args['file'])
31 except binascii.Error as err:
32 abort(400, message='Failed decoding file: ' + str(err))
33
34 file = FileStorage(stream=io.BytesIO(file_data), filename=args['file_name'])
35 filename, filepath = utils.save_file(file, self.upload_folder)
36 parser, mime = utils.get_file_parser(filepath)
37
38 if parser is None:
39 abort(415, message='The type %s is not supported' % mime)
40
41 meta = parser.get_meta()
42 if not parser.remove_all():
43 abort(500, message='Unable to clean %s' % mime)
44
45 key, meta_after, output_filename = utils.cleanup(parser, filepath, self.upload_folder)
46 return utils.return_file_created_response(
47 output_filename,
48 mime,
49 key,
50 meta,
51 meta_after,
52 urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
53 )
54
55
56class APIDownload(Resource):
57
58 def __init__(self, **kwargs):
59 self.upload_folder = kwargs['upload_folder']
60
61 def get(self, key: str, filename: str):
62 complete_path, filepath = utils.is_valid_api_download_file(filename, key, self.upload_folder)
63 # Make sure the file is NOT deleted on HEAD requests
64 if request.method == 'GET':
65 file_removal_scheduler.run_file_removal_job(self.upload_folder)
66
67 @after_this_request
68 def remove_file(response):
69 if os.path.exists(complete_path):
70 os.remove(complete_path)
71 return response
72
73 return send_from_directory(self.upload_folder, filepath, as_attachment=True)
74
75
76class APIBulkDownloadCreator(Resource):
77
78 def __init__(self, **kwargs):
79 self.upload_folder = kwargs['upload_folder']
80
81 schema = {
82 'download_list': {
83 'type': 'list',
84 'minlength': 2,
85 'maxlength': int(os.environ.get('MAT2_MAX_FILES_BULK_DOWNLOAD', 10)),
86 'schema': {
87 'type': 'dict',
88 'schema': {
89 'key': {'type': 'string', 'required': True},
90 'file_name': {'type': 'string', 'required': True}
91 }
92 }
93 }
94 }
95 v = Validator(schema)
96
97 def post(self):
98 utils.check_upload_folder(self.upload_folder)
99 data = request.json
100 if not self.v.validate(data):
101 abort(400, message=self.v.errors)
102 # prevent the zip file from being overwritten
103 zip_filename = 'files.' + str(uuid4()) + '.zip'
104 zip_path = os.path.join(self.upload_folder, zip_filename)
105 cleaned_files_zip = zipfile.ZipFile(zip_path, 'w')
106 with cleaned_files_zip:
107 for file_candidate in data['download_list']:
108 complete_path, file_path = utils.is_valid_api_download_file(
109 file_candidate['file_name'],
110 file_candidate['key'],
111 self.upload_folder
112 )
113 try:
114 cleaned_files_zip.write(complete_path)
115 os.remove(complete_path)
116 except ValueError:
117 abort(400, message='Creating the archive failed')
118
119 try:
120 cleaned_files_zip.testzip()
121 except ValueError as e:
122 abort(400, message=str(e))
123
124 parser, mime = utils.get_file_parser(zip_path)
125 if not parser.remove_all():
126 abort(500, message='Unable to clean %s' % mime)
127 key, meta_after, output_filename = utils.cleanup(parser, zip_path, self.upload_folder)
128 return {
129 'output_filename': output_filename,
130 'mime': mime,
131 'key': key,
132 'meta_after': meta_after,
133 'download_link': urljoin(request.host_url, '%s/%s/%s/%s' % ('api', 'download', key, output_filename))
134 }, 201
135
136
137class APISupportedExtensions(Resource):
138 def get(self):
139 return utils.get_supported_extensions()
diff --git a/matweb/utils.py b/matweb/utils.py
new file mode 100644
index 0000000..8dfff45
--- /dev/null
+++ b/matweb/utils.py
@@ -0,0 +1,91 @@
1import hmac
2import os
3import hashlib
4import mimetypes as mtype
5
6from flask_restful import abort
7from libmat2 import parser_factory
8from werkzeug.utils import secure_filename
9
10
11def get_allow_origin_header_value():
12 return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ")
13
14
15def hash_file(filepath: str) -> str:
16 sha256 = hashlib.sha256()
17 with open(filepath, 'rb') as f:
18 while True:
19 data = f.read(65536) # read the file by chunk of 64k
20 if not data:
21 break
22 sha256.update(data)
23 return sha256.hexdigest()
24
25
26def check_upload_folder(upload_folder):
27 if not os.path.exists(upload_folder):
28 os.mkdir(upload_folder)
29
30
31def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link):
32 return {
33 'output_filename': output_filename,
34 'mime': mime,
35 'key': key,
36 'meta': meta,
37 'meta_after': meta_after,
38 'download_link': download_link
39 }
40
41
42def get_supported_extensions():
43 extensions = set()
44 for parser in parser_factory._get_parsers():
45 for m in parser.mimetypes:
46 extensions |= set(mtype.guess_all_extensions(m, strict=False))
47 # since `guess_extension` might return `None`, we need to filter it out
48 return sorted(filter(None, extensions))
49
50
51def save_file(file, upload_folder):
52 filename = secure_filename(file.filename)
53 filepath = os.path.join(upload_folder, filename)
54 file.save(os.path.join(filepath))
55 return filename, filepath
56
57
58def get_file_parser(filepath: str):
59 parser, mime = parser_factory.get_parser(filepath)
60 return parser, mime
61
62
63def cleanup(parser, filepath, upload_folder):
64 output_filename = os.path.basename(parser.output_filename)
65 parser, _ = parser_factory.get_parser(parser.output_filename)
66 meta_after = parser.get_meta()
67 os.remove(filepath)
68
69 key = hash_file(os.path.join(upload_folder, output_filename))
70 return key, meta_after, output_filename
71
72
73def get_file_paths(filename, upload_folder):
74 filepath = secure_filename(filename)
75
76 complete_path = os.path.join(upload_folder, filepath)
77 return complete_path, filepath
78
79
80def is_valid_api_download_file(filename, key, upload_folder):
81 if filename != secure_filename(filename):
82 abort(400, message='Insecure filename')
83
84 complete_path, filepath = get_file_paths(filename, upload_folder)
85
86 if not os.path.exists(complete_path):
87 abort(404, message='File not found')
88
89 if hmac.compare_digest(hash_file(complete_path), key) is False:
90 abort(400, message='The file hash does not match')
91 return complete_path, filepath
diff --git a/templates/download.html b/templates/download.html
index 7ece063..736c9f5 100644
--- a/templates/download.html
+++ b/templates/download.html
@@ -10,7 +10,7 @@
10 {% endif %} 10 {% endif %}
11 <div class="uk-flex uk-flex-center"> 11 <div class="uk-flex uk-flex-center">
12 <div> 12 <div>
13 <a class="uk-flex-1" href='{{ url_for('download_file', key=key, filename=filename) }}'> 13 <a class="uk-flex-1" href='{{ url_for('routes.download_file', key=key, filename=filename) }}'>
14 <button class="uk-button uk-button-primary"> 14 <button class="uk-button uk-button-primary">
15 ⇩ download cleaned file 15 ⇩ download cleaned file
16 </button> 16 </button>
diff --git a/test/test.py b/test/test.py
index f871bb9..02216ac 100644
--- a/test/test.py
+++ b/test/test.py
@@ -13,11 +13,12 @@ import main
13class Mat2WebTestCase(unittest.TestCase): 13class Mat2WebTestCase(unittest.TestCase):
14 def setUp(self): 14 def setUp(self):
15 os.environ.setdefault('MAT2_ALLOW_ORIGIN_WHITELIST', 'origin1.gnu origin2.gnu') 15 os.environ.setdefault('MAT2_ALLOW_ORIGIN_WHITELIST', 'origin1.gnu origin2.gnu')
16 app = main.create_app()
17 self.upload_folder = tempfile.mkdtemp() 16 self.upload_folder = tempfile.mkdtemp()
18 app.config.update( 17 app = main.create_app(
19 TESTING=True, 18 test_config={
20 UPLOAD_FOLDER=self.upload_folder 19 'TESTING': True,
20 'UPLOAD_FOLDER': self.upload_folder
21 }
21 ) 22 )
22 self.app = app.test_client() 23 self.app = app.test_client()
23 24
@@ -127,7 +128,7 @@ class Mat2WebTestCase(unittest.TestCase):
127 rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt') 128 rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt')
128 self.assertEqual(rv.status_code, 302) 129 self.assertEqual(rv.status_code, 302)
129 130
130 @patch('file_removal_scheduler.random.randint') 131 @patch('matweb.file_removal_scheduler.random.randint')
131 def test_upload_leftover(self, randint_mock): 132 def test_upload_leftover(self, randint_mock):
132 randint_mock.return_value = 0 133 randint_mock.return_value = 0
133 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' 134 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
diff --git a/test/test_api.py b/test/test_api.py
index 3074bd5..36aae9d 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -14,12 +14,14 @@ import main
14class Mat2APITestCase(unittest.TestCase): 14class Mat2APITestCase(unittest.TestCase):
15 def setUp(self): 15 def setUp(self):
16 os.environ.setdefault('MAT2_ALLOW_ORIGIN_WHITELIST', 'origin1.gnu origin2.gnu') 16 os.environ.setdefault('MAT2_ALLOW_ORIGIN_WHITELIST', 'origin1.gnu origin2.gnu')
17 app = main.create_app()
18 self.upload_folder = tempfile.mkdtemp() 17 self.upload_folder = tempfile.mkdtemp()
19 app.config.update( 18 app = main.create_app(
20 TESTING=True, 19 test_config={
21 UPLOAD_FOLDER=self.upload_folder 20 'TESTING': True,
21 'UPLOAD_FOLDER': self.upload_folder
22 }
22 ) 23 )
24
23 self.app = app.test_client() 25 self.app = app.test_client()
24 26
25 def tearDown(self): 27 def tearDown(self):
@@ -38,7 +40,7 @@ class Mat2APITestCase(unittest.TestCase):
38 self.assertEqual(request.headers['Access-Control-Allow-Origin'], 'origin1.gnu') 40 self.assertEqual(request.headers['Access-Control-Allow-Origin'], 'origin1.gnu')
39 self.assertEqual(request.status_code, 200) 41 self.assertEqual(request.status_code, 200)
40 42
41 data = json.loads(request.data.decode('utf-8')) 43 data = request.get_json()
42 expected = { 44 expected = {
43 'output_filename': 'test_name.cleaned.jpg', 45 'output_filename': 'test_name.cleaned.jpg',
44 'mime': 'image/jpeg', 46 'mime': 'image/jpeg',
@@ -64,7 +66,7 @@ class Mat2APITestCase(unittest.TestCase):
64 self.assertEqual(request.headers['Content-Type'], 'application/json') 66 self.assertEqual(request.headers['Content-Type'], 'application/json')
65 67
66 self.assertEqual(request.status_code, 400) 68 self.assertEqual(request.status_code, 400)
67 error = json.loads(request.data.decode('utf-8'))['message'] 69 error = request.get_json()['message']
68 self.assertEqual(error['file'], 'Post parameter is not specified: file') 70 self.assertEqual(error['file'], 'Post parameter is not specified: file')
69 71
70 request = self.app.post('/api/upload', 72 request = self.app.post('/api/upload',
@@ -74,7 +76,7 @@ class Mat2APITestCase(unittest.TestCase):
74 self.assertEqual(request.headers['Content-Type'], 'application/json') 76 self.assertEqual(request.headers['Content-Type'], 'application/json')
75 77
76 self.assertEqual(request.status_code, 400) 78 self.assertEqual(request.status_code, 400)
77 error = json.loads(request.data.decode('utf-8'))['message'] 79 error = request.get_json()['message']
78 self.assertEqual(error, 'Failed decoding file: Incorrect padding') 80 self.assertEqual(error, 'Failed decoding file: Incorrect padding')
79 81
80 def test_api_not_supported(self): 82 def test_api_not_supported(self):
@@ -87,7 +89,7 @@ class Mat2APITestCase(unittest.TestCase):
87 self.assertEqual(request.headers['Content-Type'], 'application/json') 89 self.assertEqual(request.headers['Content-Type'], 'application/json')
88 self.assertEqual(request.status_code, 415) 90 self.assertEqual(request.status_code, 415)
89 91
90 error = json.loads(request.data.decode('utf-8'))['message'] 92 error = request.get_json()['message']
91 self.assertEqual(error, 'The type application/pdf is not supported') 93 self.assertEqual(error, 'The type application/pdf is not supported')
92 94
93 def test_api_supported_extensions(self): 95 def test_api_supported_extensions(self):
@@ -136,7 +138,7 @@ class Mat2APITestCase(unittest.TestCase):
136 'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}', 138 'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}',
137 headers={'content-type': 'application/json'} 139 headers={'content-type': 'application/json'}
138 ) 140 )
139 error = json.loads(request.data.decode('utf-8'))['message'] 141 error = request.get_json()['message']
140 self.assertEqual(error, 'Unable to clean application/zip') 142 self.assertEqual(error, 'Unable to clean application/zip')
141 143
142 144
@@ -148,25 +150,25 @@ class Mat2APITestCase(unittest.TestCase):
148 headers={'content-type': 'application/json'} 150 headers={'content-type': 'application/json'}
149 ) 151 )
150 self.assertEqual(request.status_code, 200) 152 self.assertEqual(request.status_code, 200)
151 data = json.loads(request.data.decode('utf-8')) 153 data = request.get_json()
152 154
153 request = self.app.get('http://localhost/api/download/' 155 request = self.app.get('http://localhost/api/download/'
154 '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161/test name.cleaned.jpg') 156 '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161/test name.cleaned.jpg')
155 self.assertEqual(request.status_code, 400) 157 self.assertEqual(request.status_code, 400)
156 error = json.loads(request.data.decode('utf-8'))['message'] 158 error = request.get_json()['message']
157 self.assertEqual(error, 'Insecure filename') 159 self.assertEqual(error, 'Insecure filename')
158 160
159 request = self.app.get('http://localhost/api/download/' 161 request = self.app.get('http://localhost/api/download/'
160 '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161/' 162 '81a541f9ebc0233d419d25ed39908b16f82be26a783f32d56c381559e84e6161/'
161 'wrong_file_name.jpg') 163 'wrong_file_name.jpg')
162 self.assertEqual(request.status_code, 404) 164 self.assertEqual(request.status_code, 404)
163 error = json.loads(request.data.decode('utf-8'))['message'] 165 error = request.get_json()['message']
164 self.assertEqual(error, 'File not found') 166 self.assertEqual(error, 'File not found')
165 167
166 request = self.app.get('http://localhost/api/download/81a541f9e/test_name.cleaned.jpg') 168 request = self.app.get('http://localhost/api/download/81a541f9e/test_name.cleaned.jpg')
167 self.assertEqual(request.status_code, 400) 169 self.assertEqual(request.status_code, 400)
168 170
169 error = json.loads(request.data.decode('utf-8'))['message'] 171 error = request.get_json()['message']
170 self.assertEqual(error, 'The file hash does not match') 172 self.assertEqual(error, 'The file hash does not match')
171 173
172 request = self.app.head(data['download_link']) 174 request = self.app.head(data['download_link'])
@@ -188,7 +190,7 @@ class Mat2APITestCase(unittest.TestCase):
188 headers={'content-type': 'application/json'} 190 headers={'content-type': 'application/json'}
189 ) 191 )
190 self.assertEqual(request.status_code, 200) 192 self.assertEqual(request.status_code, 200)
191 upload_one = json.loads(request.data.decode('utf-8')) 193 upload_one = request.get_json()
192 194
193 request = self.app.post('/api/upload', 195 request = self.app.post('/api/upload',
194 data='{"file_name": "test_name_two.jpg", ' 196 data='{"file_name": "test_name_two.jpg", '
@@ -197,7 +199,7 @@ class Mat2APITestCase(unittest.TestCase):
197 headers={'content-type': 'application/json'} 199 headers={'content-type': 'application/json'}
198 ) 200 )
199 self.assertEqual(request.status_code, 200) 201 self.assertEqual(request.status_code, 200)
200 upload_two = json.loads(request.data.decode('utf-8')) 202 upload_two = request.get_json()
201 203
202 post_body = { 204 post_body = {
203 u'download_list': [ 205 u'download_list': [
@@ -216,7 +218,7 @@ class Mat2APITestCase(unittest.TestCase):
216 headers={'content-type': 'application/json'} 218 headers={'content-type': 'application/json'}
217 ) 219 )
218 220
219 response = json.loads(request.data.decode('utf-8')) 221 response = request.get_json()
220 self.assertEqual(request.status_code, 201) 222 self.assertEqual(request.status_code, 201)
221 223
222 self.assertIn( 224 self.assertIn(
@@ -268,7 +270,7 @@ class Mat2APITestCase(unittest.TestCase):
268 headers={'content-type': 'application/json'} 270 headers={'content-type': 'application/json'}
269 ) 271 )
270 272
271 response = json.loads(request.data.decode('utf-8')) 273 response = request.get_json()
272 self.assertEqual(response['message']['download_list'][0], 'min length is 2') 274 self.assertEqual(response['message']['download_list'][0], 'min length is 2')
273 self.assertEqual(request.status_code, 400) 275 self.assertEqual(request.status_code, 400)
274 276
@@ -280,7 +282,7 @@ class Mat2APITestCase(unittest.TestCase):
280 headers={'content-type': 'application/json'} 282 headers={'content-type': 'application/json'}
281 ) 283 )
282 284
283 response = json.loads(request.data.decode('utf-8')) 285 response = request.get_json()
284 self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field') 286 self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field')
285 self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field') 287 self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field')
286 self.assertEqual(request.status_code, 400) 288 self.assertEqual(request.status_code, 400)
@@ -338,7 +340,7 @@ class Mat2APITestCase(unittest.TestCase):
338 headers={'content-type': 'application/json'} 340 headers={'content-type': 'application/json'}
339 ) 341 )
340 342
341 response = json.loads(request.data.decode('utf-8')) 343 response = request.get_json()
342 self.assertEqual(response['message']['download_list'][0], 'max length is 10') 344 self.assertEqual(response['message']['download_list'][0], 'max length is 10')
343 self.assertEqual(request.status_code, 400) 345 self.assertEqual(request.status_code, 400)
344 346
@@ -358,17 +360,18 @@ class Mat2APITestCase(unittest.TestCase):
358 data=json.dumps(post_body), 360 data=json.dumps(post_body),
359 headers={'content-type': 'application/json'} 361 headers={'content-type': 'application/json'}
360 ) 362 )
361 response = json.loads(request.data.decode('utf-8')) 363 response = request.get_json()
362 self.assertEqual('File not found', response['message']) 364 self.assertEqual('File not found', response['message'])
363 365
364 @patch('file_removal_scheduler.random.randint') 366 @patch('matweb.file_removal_scheduler.random.randint')
365 def test_api_upload_leftover(self, randint_mock): 367 def test_api_upload_leftover(self, randint_mock):
366 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' 368 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
367 app = main.create_app()
368 self.upload_folder = tempfile.mkdtemp() 369 self.upload_folder = tempfile.mkdtemp()
369 app.config.update( 370 app = main.create_app(
370 TESTING=True, 371 test_config={
371 UPLOAD_FOLDER=self.upload_folder 372 'TESTING': True,
373 'UPLOAD_FOLDER': self.upload_folder
374 }
372 ) 375 )
373 app = app.test_client() 376 app = app.test_client()
374 randint_mock.return_value = 1 377 randint_mock.return_value = 1
@@ -385,7 +388,7 @@ class Mat2APITestCase(unittest.TestCase):
385 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}', 388 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}',
386 headers={'content-type': 'application/json'} 389 headers={'content-type': 'application/json'}
387 ) 390 )
388 download_link = json.loads(request.data.decode('utf-8'))['download_link'] 391 download_link = request.get_json()['download_link']
389 request = app.get(download_link) 392 request = app.get(download_link)
390 self.assertEqual(code, request.status_code) 393 self.assertEqual(code, request.status_code)
391 394
diff --git a/test/test_file_removal_scheduler.py b/test/test_file_removal_scheduler.py
index 7f6771a..1210cb2 100644
--- a/test/test_file_removal_scheduler.py
+++ b/test/test_file_removal_scheduler.py
@@ -3,7 +3,9 @@ import tempfile
3from os import path, environ 3from os import path, environ
4import shutil 4import shutil
5 5
6import file_removal_scheduler 6from unittest.mock import patch
7
8from matweb import file_removal_scheduler
7import main 9import main
8 10
9 11
@@ -17,26 +19,28 @@ class Mat2WebTestCase(unittest.TestCase):
17 ) 19 )
18 self.app = app 20 self.app = app
19 21
20 def test_removal(self): 22 @patch('matweb.file_removal_scheduler.random.randint')
23 def test_removal(self, randint_mock):
21 filename = 'test_name.cleaned.jpg' 24 filename = 'test_name.cleaned.jpg'
22 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0' 25 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
23 open(path.join(self.upload_folder, filename), 'a').close() 26 open(path.join(self.upload_folder, filename), 'a').close()
24 self.assertTrue(path.exists(path.join(self.upload_folder, ))) 27 self.assertTrue(path.exists(path.join(self.upload_folder, )))
25 for i in range(0, 11): 28 randint_mock.return_value = 0
26 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) 29 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
27 self.assertFalse(path.exists(path.join(self.upload_folder, filename))) 30 self.assertFalse(path.exists(path.join(self.upload_folder, filename)))
28 31
29 open(path.join(self.upload_folder, filename), 'a').close() 32 open(path.join(self.upload_folder, filename), 'a').close()
30 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) 33 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
31 self.assertTrue(path.exists(path.join(self.upload_folder, ))) 34 self.assertTrue(path.exists(path.join(self.upload_folder, )))
32 35
33 def test_non_removal(self): 36 @patch('matweb.file_removal_scheduler.random.randint')
37 def test_non_removal(self, randint_mock):
34 filename = u'i_should_no_be_removed.txt' 38 filename = u'i_should_no_be_removed.txt'
35 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999' 39 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999'
36 open(path.join(self.upload_folder, filename), 'a').close() 40 open(path.join(self.upload_folder, filename), 'a').close()
37 self.assertTrue(path.exists(path.join(self.upload_folder, filename))) 41 self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
38 for i in range(0, 11): 42 randint_mock.return_value = 0
39 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER']) 43 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
40 self.assertTrue(path.exists(path.join(self.upload_folder, filename))) 44 self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
41 45
42 def tearDown(self): 46 def tearDown(self):
diff --git a/utils.py b/utils.py
deleted file mode 100644
index 023c838..0000000
--- a/utils.py
+++ /dev/null
@@ -1,33 +0,0 @@
1import os
2import hashlib
3
4
5def get_allow_origin_header_value():
6 return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ")
7
8
9def hash_file(filepath: str) -> str:
10 sha256 = hashlib.sha256()
11 with open(filepath, 'rb') as f:
12 while True:
13 data = f.read(65536) # read the file by chunk of 64k
14 if not data:
15 break
16 sha256.update(data)
17 return sha256.hexdigest()
18
19
20def check_upload_folder(upload_folder):
21 if not os.path.exists(upload_folder):
22 os.mkdir(upload_folder)
23
24
25def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link):
26 return {
27 'output_filename': output_filename,
28 'mime': mime,
29 'key': key,
30 'meta': meta,
31 'meta_after': meta_after,
32 'download_link': download_link
33 }