From c301e472bd7fd79d675c5df089db0b16fd1e2cfe Mon Sep 17 00:00:00 2001 From: jfriedli Date: Sun, 26 Apr 2020 09:50:14 -0700 Subject: Resolve "Use a HMAC instead of a hash" --- matweb/utils.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'matweb/utils.py') diff --git a/matweb/utils.py b/matweb/utils.py index 8dfff45..ec9b99c 100644 --- a/matweb/utils.py +++ b/matweb/utils.py @@ -12,15 +12,21 @@ def get_allow_origin_header_value(): return os.environ.get('MAT2_ALLOW_ORIGIN_WHITELIST', '*').split(" ") -def hash_file(filepath: str) -> str: - sha256 = hashlib.sha256() +def hash_file(filepath: str, secret: str) -> str: + """ + The goal of the hmac is to ONLY make the hashes unpredictable + :param filepath: Path of the file + :param secret: a server side generated secret + :return: digest, secret + """ + mac = hmac.new(secret.encode(), None, hashlib.sha256) with open(filepath, 'rb') as f: while True: data = f.read(65536) # read the file by chunk of 64k if not data: break - sha256.update(data) - return sha256.hexdigest() + mac.update(data) + return mac.hexdigest() def check_upload_folder(upload_folder): @@ -28,11 +34,20 @@ def check_upload_folder(upload_folder): os.mkdir(upload_folder) -def return_file_created_response(output_filename, mime, key, meta, meta_after, download_link): +def return_file_created_response( + output_filename: str, + mime: str, + key: str, + secret: str, + meta: list, + meta_after: list, + download_link: str +) -> dict: return { 'output_filename': output_filename, 'mime': mime, 'key': key, + 'secret': secret, 'meta': meta, 'meta_after': meta_after, 'download_link': download_link @@ -65,9 +80,9 @@ def cleanup(parser, filepath, upload_folder): parser, _ = parser_factory.get_parser(parser.output_filename) meta_after = parser.get_meta() os.remove(filepath) - - key = hash_file(os.path.join(upload_folder, output_filename)) - return key, meta_after, output_filename + secret = os.urandom(32).hex() + key = hash_file(os.path.join(upload_folder, output_filename), secret) + return key, secret, meta_after, output_filename def get_file_paths(filename, upload_folder): @@ -77,7 +92,7 @@ def get_file_paths(filename, upload_folder): return complete_path, filepath -def is_valid_api_download_file(filename, key, upload_folder): +def is_valid_api_download_file(filename: str, key: str, secret: str, upload_folder: str) -> [str, str]: if filename != secure_filename(filename): abort(400, message='Insecure filename') @@ -86,6 +101,6 @@ def is_valid_api_download_file(filename, key, upload_folder): if not os.path.exists(complete_path): abort(404, message='File not found') - if hmac.compare_digest(hash_file(complete_path), key) is False: + if hmac.compare_digest(hash_file(complete_path, secret), key) is False: abort(400, message='The file hash does not match') return complete_path, filepath -- cgit v1.3