diff options
Diffstat (limited to 'modules/libfuzzy.py')
| -rw-r--r-- | modules/libfuzzy.py | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py new file mode 100644 index 0000000..a0d3f15 --- /dev/null +++ b/modules/libfuzzy.py | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | ''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check | ||
| 2 | if a file is similar to a given list of samples | ||
| 3 | ''' | ||
| 4 | |||
| 5 | import os | ||
| 6 | import ctypes | ||
| 7 | import pickle | ||
| 8 | import sys | ||
| 9 | import logging | ||
| 10 | logging.basicConfig(level=logging.DEBUG) | ||
| 11 | |||
| 12 | import scanmodule | ||
| 13 | |||
| 14 | |||
| 15 | SPAMSUM_LENGTH = 64 | ||
| 16 | FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 | ||
| 17 | |||
| 18 | def main(): | ||
| 19 | return FuzzyMatcher() | ||
| 20 | |||
| 21 | class FuzzyMatcher(scanmodule.ScanModule): | ||
| 22 | name = 'libfuzzy' | ||
| 23 | def __init__(self, samples_path=None, persistence_path=None): | ||
| 24 | self.__initialize_libfuzzy() | ||
| 25 | super(FuzzyMatcher, self).__init__() | ||
| 26 | |||
| 27 | def __initialize_libfuzzy(self): | ||
| 28 | ''' Bind to libfuzzy thanks to ctypes. | ||
| 29 | This will create the "fuzzy_hash_buf" and | ||
| 30 | the "fuzzy_compare" methods | ||
| 31 | ''' | ||
| 32 | try: | ||
| 33 | fuzzy = ctypes.CDLL('libfuzzy.so') | ||
| 34 | except OSError: | ||
| 35 | print('[-] Please check that you installed libfuzzy') | ||
| 36 | sys.exit(1) | ||
| 37 | |||
| 38 | self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf | ||
| 39 | self.__fuzzy_hash_buf.restype = ctypes.c_int | ||
| 40 | self.__fuzzy_hash_buf.argtypes = [ | ||
| 41 | ctypes.c_char_p, #buf | ||
| 42 | ctypes.c_uint32, #buf_len | ||
| 43 | ctypes.c_char_p, #result | ||
| 44 | ] | ||
| 45 | self.__fuzzy_compare = fuzzy.fuzzy_compare | ||
| 46 | self.__fuzzy_compare.restype = ctypes.c_int | ||
| 47 | self.__fuzzy_compare.argtypes = [ | ||
| 48 | ctypes.c_char_p, #sig1 | ||
| 49 | ctypes.c_char_p, #sig2 | ||
| 50 | ] | ||
| 51 | |||
| 52 | def populate(self, path): | ||
| 53 | ''' Computes fuzzy hashes of files under the given path, | ||
| 54 | and store them in the dict self.samples with the form dict {name: fuzzy_hash} | ||
| 55 | @param path Path containing the samples | ||
| 56 | ''' | ||
| 57 | for root, _, filenames in os.walk(path): | ||
| 58 | for filename in filenames: | ||
| 59 | full_path = os.path.join(root, filename) | ||
| 60 | self.samples[full_path] = self.__hash_from_file(full_path) | ||
| 61 | |||
| 62 | def __hash_from_file(self, path): | ||
| 63 | ''' Return the hash of the given file | ||
| 64 | @param path Path to the file to hash | ||
| 65 | @ret Fuzzy hash of the given file | ||
| 66 | ''' | ||
| 67 | with open(path, 'r') as f: | ||
| 68 | out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) | ||
| 69 | content = f.read() | ||
| 70 | self.__fuzzy_hash_buf(content, len(content), out) | ||
| 71 | return out.value | ||
| 72 | |||
| 73 | def evaluate(self, path): | ||
| 74 | ''' Compare the hash of the given path to every samples one. | ||
| 75 | @ret A sorted list of the form [name, match_in_percent_superior_to_zero] | ||
| 76 | ''' | ||
| 77 | fuzzy_hash = self.__hash_from_file(path) | ||
| 78 | |||
| 79 | lst = list() | ||
| 80 | for f in self.samples: | ||
| 81 | score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) | ||
| 82 | if score: | ||
| 83 | lst.append([f, score]) | ||
| 84 | return sorted(lst, key=lambda lst: lst[1], reverse=True) | ||
| 85 | |||
| 86 | def is_malware(self, path): | ||
| 87 | max_score = 0 | ||
| 88 | fuzzy_hash = self.__hash_from_file(path) | ||
| 89 | |||
| 90 | for f in self.samples: | ||
| 91 | score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) | ||
| 92 | if score > max_score: | ||
| 93 | score = max_score | ||
| 94 | logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') | ||
| 95 | |||
| 96 | return max_score > 90 | ||
| 97 | |||
| 98 | |||
