From 6beeeebe3c43f0643e521139d3f8b1ff4a7f3059 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Thu, 5 Mar 2015 15:36:22 +0100 Subject: Yara is cooler than Python --- modules/libfuzzy.py | 98 ----------------------------------------------------- 1 file changed, 98 deletions(-) delete mode 100644 modules/libfuzzy.py (limited to 'modules/libfuzzy.py') diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py deleted file mode 100644 index a0d3f15..0000000 --- a/modules/libfuzzy.py +++ /dev/null @@ -1,98 +0,0 @@ -''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check -if a file is similar to a given list of samples -''' - -import os -import ctypes -import pickle -import sys -import logging -logging.basicConfig(level=logging.DEBUG) - -import scanmodule - - -SPAMSUM_LENGTH = 64 -FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 - -def main(): - return FuzzyMatcher() - -class FuzzyMatcher(scanmodule.ScanModule): - name = 'libfuzzy' - def __init__(self, samples_path=None, persistence_path=None): - self.__initialize_libfuzzy() - super(FuzzyMatcher, self).__init__() - - def __initialize_libfuzzy(self): - ''' Bind to libfuzzy thanks to ctypes. - This will create the "fuzzy_hash_buf" and - the "fuzzy_compare" methods - ''' - try: - fuzzy = ctypes.CDLL('libfuzzy.so') - except OSError: - print('[-] Please check that you installed libfuzzy') - sys.exit(1) - - self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf - self.__fuzzy_hash_buf.restype = ctypes.c_int - self.__fuzzy_hash_buf.argtypes = [ - ctypes.c_char_p, #buf - ctypes.c_uint32, #buf_len - ctypes.c_char_p, #result - ] - self.__fuzzy_compare = fuzzy.fuzzy_compare - self.__fuzzy_compare.restype = ctypes.c_int - self.__fuzzy_compare.argtypes = [ - ctypes.c_char_p, #sig1 - ctypes.c_char_p, #sig2 - ] - - def populate(self, path): - ''' Computes fuzzy hashes of files under the given path, - and store them in the dict self.samples with the form dict {name: fuzzy_hash} - @param path Path containing the samples - ''' - for root, _, filenames in os.walk(path): - for filename in filenames: - full_path = os.path.join(root, filename) - self.samples[full_path] = self.__hash_from_file(full_path) - - def __hash_from_file(self, path): - ''' Return the hash of the given file - @param path Path to the file to hash - @ret Fuzzy hash of the given file - ''' - with open(path, 'r') as f: - out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) - content = f.read() - self.__fuzzy_hash_buf(content, len(content), out) - return out.value - - def evaluate(self, path): - ''' Compare the hash of the given path to every samples one. - @ret A sorted list of the form [name, match_in_percent_superior_to_zero] - ''' - fuzzy_hash = self.__hash_from_file(path) - - lst = list() - for f in self.samples: - score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) - if score: - lst.append([f, score]) - return sorted(lst, key=lambda lst: lst[1], reverse=True) - - def is_malware(self, path): - max_score = 0 - fuzzy_hash = self.__hash_from_file(path) - - for f in self.samples: - score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) - if score > max_score: - score = max_score - logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') - - return max_score > 90 - - -- cgit v1.3