summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md20
-rw-r--r--docker-compose.yml1
-rw-r--r--file_removal_scheduler.py26
-rw-r--r--main.py20
-rw-r--r--requirements.txt2
-rw-r--r--templates/download.html2
-rw-r--r--test/test.py53
-rw-r--r--test/test_api.py49
-rw-r--r--test/test_file_removal_scheduler.py48
9 files changed, 202 insertions, 19 deletions
diff --git a/README.md b/README.md
index a10f558..22ff44d 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,12 @@ serious, yet.
16There is a demo instance deployed a [mat2-web.dustri.org](https://mat2-web.dustri.org). 16There is a demo instance deployed a [mat2-web.dustri.org](https://mat2-web.dustri.org).
17Please don't upload any sensitive files to it. 17Please don't upload any sensitive files to it.
18 18
19# Vue Frontend
20![Frontend GIF Preview](https://0xacab.org/jfriedli/mat2-quasar-frontend/raw/2dd5de537088d67fe4167bf5b2e1f5dacf2fa537/mat-frontend.gif?inline=true)
21There is a SPA Frontend available at https://0xacab.org/jfriedli/mat2-quasar-frontend. It consumes
22the RESTful API of this project. As a fallback for non JS users it redirects to this web app.
23To set it up checkout the [Readme](https://0xacab.org/jfriedli/mat2-quasar-frontend/blob/master/README.md).
24
19# How to deploy it? 25# How to deploy it?
20 26
21mat2 is available in [Debian stable](https://packages.debian.org/stable/mat2). 27mat2 is available in [Debian stable](https://packages.debian.org/stable/mat2).
@@ -46,7 +52,11 @@ Note that you can add multiple hosts from which you want to accept API requests.
46a space. 52a space.
47**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *` 53**IMPORTANT:** The default value if the variable is not set is: `Access-Control-Allow-Origin: *`
48 54
49Configure another environment variable: `MAT2_MAX_FILES_BULK_DOWNLOAD=10` 55Configure the following environment variables:
56
57 - `MAT2_MAX_FILES_BULK_DOWNLOAD=10` Max number of files that can be grouped for a bulk download.
58 - `MAT2_MAX_FILE_AGE_FOR_REMOVAL=900` Seconds a file in the upload folder is kept.
59 After that it will be deleted. Default `15 * 60`
50 60
51This specifies the max number of files that can be bulk downloaded using the api. 61This specifies the max number of files that can be bulk downloaded using the api.
52Note: Each file has a max file size of 16mb 62Note: Each file has a max file size of 16mb
@@ -60,10 +70,6 @@ systemctl restart nginx/apache/…
60 70
61It should now be working. 71It should now be working.
62 72
63You should add `find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;`
64in a crontab to remove files that people might have uploaded but never
65downloaded.
66
67# Deploy via Ansible 73# Deploy via Ansible
68 74
69If you happen to be using [Ansible](https://www.ansible.com/), there's an 75If you happen to be using [Ansible](https://www.ansible.com/), there's an
@@ -86,10 +92,6 @@ https://0xacab.org/jvoisin/mat2-web/container_registry
86Example: 92Example:
87`docker run -p 80:80 -d -e MAT2_ALLOW_ORIGIN_WHITELIST='https://myhost1.org' registry.0xacab.org/jvoisin/mat2-web:latest` 93`docker run -p 80:80 -d -e MAT2_ALLOW_ORIGIN_WHITELIST='https://myhost1.org' registry.0xacab.org/jvoisin/mat2-web:latest`
88 94
89Make sure to add
90`find /var/www/mat2-web/uploads/ -type f -mtime +1 -exec rm {} \;` as cron job
91run inside the container.
92
93# Development 95# Development
94Install docker and docker-compose and then run `docker-compose up` to setup 96Install docker and docker-compose and then run `docker-compose up` to setup
95the docker dev environment. Mat2-web is now accessible on your host machine at `localhost:5000`. 97the docker dev environment. Mat2-web is now accessible on your host machine at `localhost:5000`.
diff --git a/docker-compose.yml b/docker-compose.yml
index e925447..36678c0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,7 @@ services:
9 - FLASK_ENV=development 9 - FLASK_ENV=development
10 - MAT2_ALLOW_ORIGIN_WHITELIST=* 10 - MAT2_ALLOW_ORIGIN_WHITELIST=*
11 - MAT2_MAX_FILES_BULK_DOWNLOAD=10 11 - MAT2_MAX_FILES_BULK_DOWNLOAD=10
12 - MAT2_MAX_FILE_AGE_FOR_REMOVAL=60
12 ports: 13 ports:
13 - "5000:5000" 14 - "5000:5000"
14 volumes: 15 volumes:
diff --git a/file_removal_scheduler.py b/file_removal_scheduler.py
new file mode 100644
index 0000000..2ce7912
--- /dev/null
+++ b/file_removal_scheduler.py
@@ -0,0 +1,26 @@
1import glob
2import time
3import sys
4import os
5import random
6
7
8def run_file_removal_job(upload_folder_path):
9 if random.randint(0, 10) == 0:
10 for file in glob.glob(upload_folder_path + '/*'):
11 delete_file_when_too_old(file)
12
13
14def delete_file_when_too_old(filepath):
15 file_mod_time = os.stat(filepath).st_mtime
16
17 # time in second since last modification of file
18 last_time = time.time() - file_mod_time
19
20 # if file is older than our configured max timeframe, delete it
21 if last_time > int(os.environ.get('MAT2_MAX_FILE_AGE_FOR_REMOVAL', 15 * 60)):
22 try:
23 os.remove(filepath)
24 except OSError:
25 print('Automatic File Removal failed on file: ' + str(filepath))
26 sys.exit(1)
diff --git a/main.py b/main.py
index bd1a0c3..4cdb43e 100644
--- a/main.py
+++ b/main.py
@@ -10,6 +10,7 @@ import zipfile
10 10
11from cerberus import Validator 11from cerberus import Validator
12import utils 12import utils
13import file_removal_scheduler
13from libmat2 import parser_factory 14from libmat2 import parser_factory
14from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request 15from flask import Flask, flash, request, redirect, url_for, render_template, send_from_directory, after_this_request
15from flask_restful import Resource, Api, reqparse, abort 16from flask_restful import Resource, Api, reqparse, abort
@@ -25,31 +26,36 @@ def create_app(test_config=None):
25 app.config['UPLOAD_FOLDER'] = './uploads/' 26 app.config['UPLOAD_FOLDER'] = './uploads/'
26 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB 27 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB
27 app.config['CUSTOM_TEMPLATES_DIR'] = 'custom_templates' 28 app.config['CUSTOM_TEMPLATES_DIR'] = 'custom_templates'
28 app.config.from_object('config') # optionally load settings from config.py 29 # optionally load settings from config.py
30 app.config.from_object('config')
31
32 if test_config is not None:
33 app.config.update(test_config)
29 34
30 app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore 35 app.jinja_loader = jinja2.ChoiceLoader([ # type: ignore
31 jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']), 36 jinja2.FileSystemLoader(app.config['CUSTOM_TEMPLATES_DIR']),
32 app.jinja_loader, 37 app.jinja_loader,
33 ]) 38 ])
34 39
35 api = Api(app) 40 api = Api(app)
36 CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}}) 41 CORS(app, resources={r"/api/*": {"origins": utils.get_allow_origin_header_value()}})
37 42
38 @app.route('/download/<string:key>/<string:filename>') 43 @app.route('/download/<string:key>/<string:filename>')
39 def download_file(key: str, filename:str): 44 def download_file(key: str, filename: str):
40 if filename != secure_filename(filename): 45 if filename != secure_filename(filename):
41 return redirect(url_for('upload_file')) 46 return redirect(url_for('upload_file'))
42 47
43 complete_path, filepath = get_file_paths(filename) 48 complete_path, filepath = get_file_paths(filename)
49 file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
44 50
45 if not os.path.exists(complete_path): 51 if not os.path.exists(complete_path):
46 return redirect(url_for('upload_file')) 52 return redirect(url_for('upload_file'))
47 if hmac.compare_digest(utils.hash_file(complete_path), key) is False: 53 if hmac.compare_digest(utils.hash_file(complete_path), key) is False:
48 return redirect(url_for('upload_file')) 54 return redirect(url_for('upload_file'))
49
50 @after_this_request 55 @after_this_request
51 def remove_file(response): 56 def remove_file(response):
52 os.remove(complete_path) 57 if os.path.exists(complete_path):
58 os.remove(complete_path)
53 return response 59 return response
54 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True) 60 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
55 61
@@ -176,9 +182,11 @@ def create_app(test_config=None):
176 complete_path, filepath = is_valid_api_download_file(filename, key) 182 complete_path, filepath = is_valid_api_download_file(filename, key)
177 # Make sure the file is NOT deleted on HEAD requests 183 # Make sure the file is NOT deleted on HEAD requests
178 if request.method == 'GET': 184 if request.method == 'GET':
185 file_removal_scheduler.run_file_removal_job(app.config['UPLOAD_FOLDER'])
179 @after_this_request 186 @after_this_request
180 def remove_file(response): 187 def remove_file(response):
181 os.remove(complete_path) 188 if os.path.exists(complete_path):
189 os.remove(complete_path)
182 return response 190 return response
183 191
184 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True) 192 return send_from_directory(app.config['UPLOAD_FOLDER'], filepath, as_attachment=True)
diff --git a/requirements.txt b/requirements.txt
index 42b75e2..61f9711 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,4 @@ mat2==0.9.0
5flask==1.0.3 5flask==1.0.3
6Flask-RESTful==0.3.7 6Flask-RESTful==0.3.7
7Flask-Cors==3.0.8 7Flask-Cors==3.0.8
8Cerberus==1.3.1 \ No newline at end of file 8Cerberus==1.3.1
diff --git a/templates/download.html b/templates/download.html
index c6a4f07..e39b16b 100644
--- a/templates/download.html
+++ b/templates/download.html
@@ -14,7 +14,7 @@ mat2 <b>could not</b> remove all the metadata from <pre>{{ filename }}</pre>, th
14 </ul> 14 </ul>
15{%endif %} 15{%endif %}
16</p> 16</p>
17<a class="button button-primary" download href='{{ url_for('download_file', key=key, filename=filename) }}'>⇩ Download cleaned file</a> 17<a class="button button-primary" href='{{ url_for('download_file', key=key, filename=filename) }}'>⇩ Download cleaned file</a>
18 18
19<hr/> 19<hr/>
20 20
diff --git a/test/test.py b/test/test.py
index 34245d9..a7e6627 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,9 +1,12 @@
1import base64
1import unittest 2import unittest
2import tempfile 3import tempfile
3import shutil 4import shutil
4import io 5import io
5import os 6import os
6 7
8from unittest.mock import patch
9
7import main 10import main
8 11
9 12
@@ -62,6 +65,33 @@ class Mat2WebTestCase(unittest.TestCase):
62 rv.data) 65 rv.data)
63 self.assertEqual(rv.status_code, 200) 66 self.assertEqual(rv.status_code, 200)
64 67
68 def test_get_upload_no_selected_file(self):
69 rv = self.app.post('/',
70 data=dict(
71 file=(io.BytesIO(b""), ''),
72 ), follow_redirects=True)
73 self.assertIn(b'No selected file',
74 rv.data)
75 self.assertEqual(rv.status_code, 200)
76
77 def test_failed_cleaning(self):
78 zip_file_bytes = base64.b64decode(
79 'UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qtd29ya2luZy1le'
80 'HRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAAAAACAAAAAAAAAFBL'
81 'AwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUDQAH6JqPXeiaj13omo9d'
82 'dXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQDFAAIAAgA+Jw8TwAAAAACA'
83 'AAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXdvcmtpbmctZXh0VVQNAAfkmo9'
84 'd5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6nDxPAAAAAAIAAAAAAAAACQAgA'
85 'AAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXeiaj111eAsAAQTpAwAABOkDAAB'
86 'QSwUGAAAAAAIAAgC8AAAAwAAAAAAA'
87 )
88 rv = self.app.post('/',
89 data=dict(
90 file=(io.BytesIO(zip_file_bytes), 'test.zip'),
91 ), follow_redirects=True)
92 self.assertIn(b'Unable to clean',rv.data)
93 self.assertEqual(rv.status_code, 200)
94
65 def test_get_upload_no_file_name(self): 95 def test_get_upload_no_file_name(self):
66 rv = self.app.post('/', 96 rv = self.app.post('/',
67 data=dict( 97 data=dict(
@@ -97,6 +127,29 @@ class Mat2WebTestCase(unittest.TestCase):
97 rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt') 127 rv = self.app.get('/download/70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt')
98 self.assertEqual(rv.status_code, 302) 128 self.assertEqual(rv.status_code, 302)
99 129
130 @patch('file_removal_scheduler.random.randint')
131 def test_upload_leftover(self, randint_mock):
132 randint_mock.return_value = 0
133 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
134 app = main.create_app()
135 self.upload_folder = tempfile.mkdtemp()
136 app.config.update(
137 TESTING=True,
138 UPLOAD_FOLDER=self.upload_folder
139 )
140 app = app.test_client()
141
142 request = self.app.post('/',
143 data=dict(
144 file=(io.BytesIO(b"Some text"), 'test.txt'),
145 ), follow_redirects=True)
146 self.assertEqual(request.status_code, 200)
147 request = app.get(
148 b'/download/4c2e9e6da31a64c70623619c449a040968cdbea85945bf384fa30ed2d5d24fa3/test.cleaned.txt'
149 )
150 self.assertEqual(302, request.status_code)
151 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
152
100 153
101if __name__ == '__main__': 154if __name__ == '__main__':
102 unittest.main() 155 unittest.main()
diff --git a/test/test_api.py b/test/test_api.py
index de297c4..3074bd5 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -4,9 +4,10 @@ import json
4import os 4import os
5import shutil 5import shutil
6import zipfile 6import zipfile
7
8from six import BytesIO 7from six import BytesIO
9 8
9from unittest.mock import patch
10
10import main 11import main
11 12
12 13
@@ -122,6 +123,23 @@ class Mat2APITestCase(unittest.TestCase):
122 rv = self.app.get('/api/extension', headers={'Origin': 'origin1.gnu'}) 123 rv = self.app.get('/api/extension', headers={'Origin': 'origin1.gnu'})
123 self.assertEqual(rv.headers['Access-Control-Allow-Origin'], 'origin1.gnu') 124 self.assertEqual(rv.headers['Access-Control-Allow-Origin'], 'origin1.gnu')
124 125
126 def test_api_cleaning_failed(self):
127 request = self.app.post('/api/upload',
128 data='{"file_name": "test_name.zip", '
129 '"file": "UEsDBBQACAAIAPicPE8AAAAAAAAAAAAAAAAXACAAZmFpbGluZy5ub3Qt'
130 'd29ya2luZy1leHRVVA0AB+Saj13kmo9d5JqPXXV4CwABBOkDAAAE6QMAAAMAUEsHCAAA'
131 'AAACAAAAAAAAAFBLAwQUAAgACAD6nDxPAAAAAAAAAAAAAAAACQAgAHRlc3QuanNvblVUD'
132 'QAH6JqPXeiaj13omo9ddXgLAAEE6QMAAATpAwAAAwBQSwcIAAAAAAIAAAAAAAAAUEsBAhQD'
133 'FAAIAAgA+Jw8TwAAAAACAAAAAAAAABcAIAAAAAAAAAAAAKSBAAAAAGZhaWxpbmcubm90LXd'
134 'vcmtpbmctZXh0VVQNAAfkmo9d5JqPXeSaj111eAsAAQTpAwAABOkDAABQSwECFAMUAAgACAD6'
135 'nDxPAAAAAAIAAAAAAAAACQAgAAAAAAAAAAAApIFnAAAAdGVzdC5qc29uVVQNAAfomo9d6JqPXe'
136 'iaj111eAsAAQTpAwAABOkDAABQSwUGAAAAAAIAAgC8AAAAwAAAAAAA"}',
137 headers={'content-type': 'application/json'}
138 )
139 error = json.loads(request.data.decode('utf-8'))['message']
140 self.assertEqual(error, 'Unable to clean application/zip')
141
142
125 def test_api_download(self): 143 def test_api_download(self):
126 request = self.app.post('/api/upload', 144 request = self.app.post('/api/upload',
127 data='{"file_name": "test_name.jpg", ' 145 data='{"file_name": "test_name.jpg", '
@@ -263,7 +281,6 @@ class Mat2APITestCase(unittest.TestCase):
263 ) 281 )
264 282
265 response = json.loads(request.data.decode('utf-8')) 283 response = json.loads(request.data.decode('utf-8'))
266 print(response)
267 self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field') 284 self.assertEqual(response['message']['download_list'][0]['0'][0]['file_name'][0], 'required field')
268 self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field') 285 self.assertEqual(response['message']['download_list'][0]['0'][0]['key'][0], 'required field')
269 self.assertEqual(request.status_code, 400) 286 self.assertEqual(request.status_code, 400)
@@ -344,6 +361,34 @@ class Mat2APITestCase(unittest.TestCase):
344 response = json.loads(request.data.decode('utf-8')) 361 response = json.loads(request.data.decode('utf-8'))
345 self.assertEqual('File not found', response['message']) 362 self.assertEqual('File not found', response['message'])
346 363
364 @patch('file_removal_scheduler.random.randint')
365 def test_api_upload_leftover(self, randint_mock):
366 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
367 app = main.create_app()
368 self.upload_folder = tempfile.mkdtemp()
369 app.config.update(
370 TESTING=True,
371 UPLOAD_FOLDER=self.upload_folder
372 )
373 app = app.test_client()
374 randint_mock.return_value = 1
375 self.upload_download_test_jpg_and_assert_response_code(app, 200)
376 randint_mock.return_value = 0
377 self.upload_download_test_jpg_and_assert_response_code(app, 404)
378
379 os.environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999'
380
381 def upload_download_test_jpg_and_assert_response_code(self, app, code):
382 request = app.post('/api/upload',
383 data='{"file_name": "test_name.jpg", '
384 '"file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAf'
385 'FcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="}',
386 headers={'content-type': 'application/json'}
387 )
388 download_link = json.loads(request.data.decode('utf-8'))['download_link']
389 request = app.get(download_link)
390 self.assertEqual(code, request.status_code)
391
347 392
348if __name__ == '__main__': 393if __name__ == '__main__':
349 unittest.main() 394 unittest.main()
diff --git a/test/test_file_removal_scheduler.py b/test/test_file_removal_scheduler.py
new file mode 100644
index 0000000..7f6771a
--- /dev/null
+++ b/test/test_file_removal_scheduler.py
@@ -0,0 +1,48 @@
1import unittest
2import tempfile
3from os import path, environ
4import shutil
5
6import file_removal_scheduler
7import main
8
9
10class Mat2WebTestCase(unittest.TestCase):
11 def setUp(self):
12 self.upload_folder = tempfile.mkdtemp()
13 app = main.create_app()
14 app.config.update(
15 TESTING=True,
16 UPLOAD_FOLDER=self.upload_folder
17 )
18 self.app = app
19
20 def test_removal(self):
21 filename = 'test_name.cleaned.jpg'
22 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '0'
23 open(path.join(self.upload_folder, filename), 'a').close()
24 self.assertTrue(path.exists(path.join(self.upload_folder, )))
25 for i in range(0, 11):
26 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
27 self.assertFalse(path.exists(path.join(self.upload_folder, filename)))
28
29 open(path.join(self.upload_folder, filename), 'a').close()
30 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
31 self.assertTrue(path.exists(path.join(self.upload_folder, )))
32
33 def test_non_removal(self):
34 filename = u'i_should_no_be_removed.txt'
35 environ['MAT2_MAX_FILE_AGE_FOR_REMOVAL'] = '9999999'
36 open(path.join(self.upload_folder, filename), 'a').close()
37 self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
38 for i in range(0, 11):
39 file_removal_scheduler.run_file_removal_job(self.app.config['UPLOAD_FOLDER'])
40 self.assertTrue(path.exists(path.join(self.upload_folder, filename)))
41
42 def tearDown(self):
43 shutil.rmtree(self.upload_folder)
44
45
46if __name__ == '__main__':
47 unittest.main()
48