1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check
if a file is similar to a given list of samples
'''
import os
import ctypes
import pickle
import sys
import logging
logging.basicConfig(level=logging.DEBUG)
import scanmodule
SPAMSUM_LENGTH = 64
FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20
def main():
return FuzzyMatcher()
class FuzzyMatcher(scanmodule.ScanModule):
name = 'libfuzzy'
def __init__(self, samples_path=None, persistence_path=None):
self.__initialize_libfuzzy()
super(FuzzyMatcher, self).__init__()
def __initialize_libfuzzy(self):
''' Bind to libfuzzy thanks to ctypes.
This will create the "fuzzy_hash_buf" and
the "fuzzy_compare" methods
'''
try:
fuzzy = ctypes.CDLL('libfuzzy.so')
except OSError:
print('[-] Please check that you installed libfuzzy')
sys.exit(1)
self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf
self.__fuzzy_hash_buf.restype = ctypes.c_int
self.__fuzzy_hash_buf.argtypes = [
ctypes.c_char_p, #buf
ctypes.c_uint32, #buf_len
ctypes.c_char_p, #result
]
self.__fuzzy_compare = fuzzy.fuzzy_compare
self.__fuzzy_compare.restype = ctypes.c_int
self.__fuzzy_compare.argtypes = [
ctypes.c_char_p, #sig1
ctypes.c_char_p, #sig2
]
def populate(self, path):
''' Computes fuzzy hashes of files under the given path,
and store them in the dict self.samples with the form dict {name: fuzzy_hash}
@param path Path containing the samples
'''
for root, _, filenames in os.walk(path):
for filename in filenames:
full_path = os.path.join(root, filename)
self.samples[full_path] = self.__hash_from_file(full_path)
def __hash_from_file(self, path):
''' Return the hash of the given file
@param path Path to the file to hash
@ret Fuzzy hash of the given file
'''
with open(path, 'r') as f:
out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT)
content = f.read()
self.__fuzzy_hash_buf(content, len(content), out)
return out.value
def evaluate(self, path):
''' Compare the hash of the given path to every samples one.
@ret A sorted list of the form [name, match_in_percent_superior_to_zero]
'''
fuzzy_hash = self.__hash_from_file(path)
lst = list()
for f in self.samples:
score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
if score:
lst.append([f, score])
return sorted(lst, key=lambda lst: lst[1], reverse=True)
def is_malware(self, path):
max_score = 0
fuzzy_hash = self.__hash_from_file(path)
for f in self.samples:
score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
if score > max_score:
score = max_score
logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%')
return max_score > 90
|