summaryrefslogtreecommitdiff
path: root/modules/libfuzzy.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/libfuzzy.py')
-rw-r--r--modules/libfuzzy.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py
new file mode 100644
index 0000000..a0d3f15
--- /dev/null
+++ b/modules/libfuzzy.py
@@ -0,0 +1,98 @@
1''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check
2if a file is similar to a given list of samples
3'''
4
5import os
6import ctypes
7import pickle
8import sys
9import logging
10logging.basicConfig(level=logging.DEBUG)
11
12import scanmodule
13
14
15SPAMSUM_LENGTH = 64
16FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20
17
18def main():
19 return FuzzyMatcher()
20
21class FuzzyMatcher(scanmodule.ScanModule):
22 name = 'libfuzzy'
23 def __init__(self, samples_path=None, persistence_path=None):
24 self.__initialize_libfuzzy()
25 super(FuzzyMatcher, self).__init__()
26
27 def __initialize_libfuzzy(self):
28 ''' Bind to libfuzzy thanks to ctypes.
29 This will create the "fuzzy_hash_buf" and
30 the "fuzzy_compare" methods
31 '''
32 try:
33 fuzzy = ctypes.CDLL('libfuzzy.so')
34 except OSError:
35 print('[-] Please check that you installed libfuzzy')
36 sys.exit(1)
37
38 self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf
39 self.__fuzzy_hash_buf.restype = ctypes.c_int
40 self.__fuzzy_hash_buf.argtypes = [
41 ctypes.c_char_p, #buf
42 ctypes.c_uint32, #buf_len
43 ctypes.c_char_p, #result
44 ]
45 self.__fuzzy_compare = fuzzy.fuzzy_compare
46 self.__fuzzy_compare.restype = ctypes.c_int
47 self.__fuzzy_compare.argtypes = [
48 ctypes.c_char_p, #sig1
49 ctypes.c_char_p, #sig2
50 ]
51
52 def populate(self, path):
53 ''' Computes fuzzy hashes of files under the given path,
54 and store them in the dict self.samples with the form dict {name: fuzzy_hash}
55 @param path Path containing the samples
56 '''
57 for root, _, filenames in os.walk(path):
58 for filename in filenames:
59 full_path = os.path.join(root, filename)
60 self.samples[full_path] = self.__hash_from_file(full_path)
61
62 def __hash_from_file(self, path):
63 ''' Return the hash of the given file
64 @param path Path to the file to hash
65 @ret Fuzzy hash of the given file
66 '''
67 with open(path, 'r') as f:
68 out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT)
69 content = f.read()
70 self.__fuzzy_hash_buf(content, len(content), out)
71 return out.value
72
73 def evaluate(self, path):
74 ''' Compare the hash of the given path to every samples one.
75 @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
76 '''
77 fuzzy_hash = self.__hash_from_file(path)
78
79 lst = list()
80 for f in self.samples:
81 score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
82 if score:
83 lst.append([f, score])
84 return sorted(lst, key=lambda lst: lst[1], reverse=True)
85
86 def is_malware(self, path):
87 max_score = 0
88 fuzzy_hash = self.__hash_from_file(path)
89
90 for f in self.samples:
91 score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
92 if score > max_score:
93 score = max_score
94 logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%')
95
96 return max_score > 90
97
98