From 807248f9343a4cabb48c3be1a512b27f6377e871 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Tue, 3 Mar 2015 15:58:59 +0100 Subject: First commit! --- modules/libfuzzy.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 modules/libfuzzy.py (limited to 'modules/libfuzzy.py') diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py new file mode 100644 index 0000000..a0d3f15 --- /dev/null +++ b/modules/libfuzzy.py @@ -0,0 +1,98 @@ +''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check +if a file is similar to a given list of samples +''' + +import os +import ctypes +import pickle +import sys +import logging +logging.basicConfig(level=logging.DEBUG) + +import scanmodule + + +SPAMSUM_LENGTH = 64 +FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 + +def main(): + return FuzzyMatcher() + +class FuzzyMatcher(scanmodule.ScanModule): + name = 'libfuzzy' + def __init__(self, samples_path=None, persistence_path=None): + self.__initialize_libfuzzy() + super(FuzzyMatcher, self).__init__() + + def __initialize_libfuzzy(self): + ''' Bind to libfuzzy thanks to ctypes. + This will create the "fuzzy_hash_buf" and + the "fuzzy_compare" methods + ''' + try: + fuzzy = ctypes.CDLL('libfuzzy.so') + except OSError: + print('[-] Please check that you installed libfuzzy') + sys.exit(1) + + self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf + self.__fuzzy_hash_buf.restype = ctypes.c_int + self.__fuzzy_hash_buf.argtypes = [ + ctypes.c_char_p, #buf + ctypes.c_uint32, #buf_len + ctypes.c_char_p, #result + ] + self.__fuzzy_compare = fuzzy.fuzzy_compare + self.__fuzzy_compare.restype = ctypes.c_int + self.__fuzzy_compare.argtypes = [ + ctypes.c_char_p, #sig1 + ctypes.c_char_p, #sig2 + ] + + def populate(self, path): + ''' Computes fuzzy hashes of files under the given path, + and store them in the dict self.samples with the form dict {name: fuzzy_hash} + @param path Path containing the samples + ''' + for root, _, filenames in os.walk(path): + for filename in filenames: + full_path = os.path.join(root, filename) + self.samples[full_path] = self.__hash_from_file(full_path) + + def __hash_from_file(self, path): + ''' Return the hash of the given file + @param path Path to the file to hash + @ret Fuzzy hash of the given file + ''' + with open(path, 'r') as f: + out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) + content = f.read() + self.__fuzzy_hash_buf(content, len(content), out) + return out.value + + def evaluate(self, path): + ''' Compare the hash of the given path to every samples one. + @ret A sorted list of the form [name, match_in_percent_superior_to_zero] + ''' + fuzzy_hash = self.__hash_from_file(path) + + lst = list() + for f in self.samples: + score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) + if score: + lst.append([f, score]) + return sorted(lst, key=lambda lst: lst[1], reverse=True) + + def is_malware(self, path): + max_score = 0 + fuzzy_hash = self.__hash_from_file(path) + + for f in self.samples: + score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) + if score > max_score: + score = max_score + logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') + + return max_score > 90 + + -- cgit v1.3