''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check if a file is similar to a given list of samples ''' import os import ctypes import pickle import sys import logging logging.basicConfig(level=logging.DEBUG) import scanmodule SPAMSUM_LENGTH = 64 FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 def main(): return FuzzyMatcher() class FuzzyMatcher(scanmodule.ScanModule): name = 'libfuzzy' def __init__(self, samples_path=None, persistence_path=None): self.__initialize_libfuzzy() super(FuzzyMatcher, self).__init__() def __initialize_libfuzzy(self): ''' Bind to libfuzzy thanks to ctypes. This will create the "fuzzy_hash_buf" and the "fuzzy_compare" methods ''' try: fuzzy = ctypes.CDLL('libfuzzy.so') except OSError: print('[-] Please check that you installed libfuzzy') sys.exit(1) self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf self.__fuzzy_hash_buf.restype = ctypes.c_int self.__fuzzy_hash_buf.argtypes = [ ctypes.c_char_p, #buf ctypes.c_uint32, #buf_len ctypes.c_char_p, #result ] self.__fuzzy_compare = fuzzy.fuzzy_compare self.__fuzzy_compare.restype = ctypes.c_int self.__fuzzy_compare.argtypes = [ ctypes.c_char_p, #sig1 ctypes.c_char_p, #sig2 ] def populate(self, path): ''' Computes fuzzy hashes of files under the given path, and store them in the dict self.samples with the form dict {name: fuzzy_hash} @param path Path containing the samples ''' for root, _, filenames in os.walk(path): for filename in filenames: full_path = os.path.join(root, filename) self.samples[full_path] = self.__hash_from_file(full_path) def __hash_from_file(self, path): ''' Return the hash of the given file @param path Path to the file to hash @ret Fuzzy hash of the given file ''' with open(path, 'r') as f: out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) content = f.read() self.__fuzzy_hash_buf(content, len(content), out) return out.value def evaluate(self, path): ''' Compare the hash of the given path to every samples one. @ret A sorted list of the form [name, match_in_percent_superior_to_zero] ''' fuzzy_hash = self.__hash_from_file(path) lst = list() for f in self.samples: score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) if score: lst.append([f, score]) return sorted(lst, key=lambda lst: lst[1], reverse=True) def is_malware(self, path): max_score = 0 fuzzy_hash = self.__hash_from_file(path) for f in self.samples: score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) if score > max_score: score = max_score logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') return max_score > 90