From 807248f9343a4cabb48c3be1a512b27f6377e871 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Tue, 3 Mar 2015 15:58:59 +0100 Subject: First commit! --- modules/__init__.py | 0 modules/entropy.py | 56 ++++++++++++ modules/grep_count.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++ modules/levenshtein.py | 73 +++++++++++++++ modules/libfuzzy.py | 98 +++++++++++++++++++++ modules/scanmodule.py | 56 ++++++++++++ modules/whitelist.py | 46 ++++++++++ 7 files changed, 563 insertions(+) create mode 100644 modules/__init__.py create mode 100644 modules/entropy.py create mode 100644 modules/grep_count.py create mode 100644 modules/levenshtein.py create mode 100644 modules/libfuzzy.py create mode 100644 modules/scanmodule.py create mode 100644 modules/whitelist.py (limited to 'modules') diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/entropy.py b/modules/entropy.py new file mode 100644 index 0000000..48b2924 --- /dev/null +++ b/modules/entropy.py @@ -0,0 +1,56 @@ +''' This module uses shannon's Entropy to detect packed malwares +''' +import os +import math +import logging +logging.basicConfig(level=logging.DEBUG) + +import scanmodule + +def main(): + return Entropy() + +class Entropy(scanmodule.ScanModule): + name = 'entropy' + def populate(self, path): + pass + def load(self, path): + pass + def save(self, path): + pass + + def __compute_score(self, path): + return (self.__entropy(path) - 5) * 100 + + def is_malware(self, path): + score = self.__compute_score(path) + logging.info('Entropy score for ' + path + ' : ' + str(score)) + return score > 75 + + def evaluate(self, path): + ''' Computes an arbitraty score for the given path + @ret A sorted list of the form [name, match_in_percent_superior_to_zero] + ''' + score = self.__compute_score(path) + if score > 0: + return [['MALWARE', score],] + return None + + def __entropy(self, path): + ''' Computes shannon's entropy for the given file + @param path Path to the file + ''' + # Computes the frequency of each byte in the file + fsize = max(float(os.path.getsize(path)), 1.0) + + freq = [0] * 256 + with open(path, 'rb') as f: + for c in f.read(): + freq[ord(c)] += 1 + + entropy = 0.0 + for f in freq: + if f: + f /= fsize + entropy += f * math.log(f, 2) + return -entropy diff --git a/modules/grep_count.py b/modules/grep_count.py new file mode 100644 index 0000000..2431960 --- /dev/null +++ b/modules/grep_count.py @@ -0,0 +1,234 @@ +''' This module count the occurences of dodgy terms present in a file +''' +import os +import logging +logging.basicConfig(level=logging.DEBUG) + +import scanmodule + +def main(): + return GrepCount() + +class GrepCount(scanmodule.ScanModule): + name = 'grep count' + + # ranked from 1 to 10, 10 being EVIL + # Also, 100 is awarded to MEGA-DUH-OBVIOUS things. + dodgy_terms = { + '$GLOBALS': 6, + 'WWW-Authenticate': 7, + 'ZipArchive': 6, + 'apache_get_modules': 8, + 'assert': 5, + 'base64_decode': 7, + 'bzdecompress': 7, + 'chmod': 8, + 'curl_init("file://': 100, # safe mode bypass exploit + #'dl': 10, + #'exec': 10, + 'eval(': 10, + 'eval(base64_decode': 100, + 'eval($_GET': 100, + 'eval($_POST': 100, + 'eval($_REQUEST': 100, + 'eval(base64_decode': 100, + 'eval(gzinflate': 100, + 'file_get_contents': 6, + 'fpassthru': 100, + 'fsockopen': 7, + 'ftp_connect': 7, + 'ftp_exec': 7, + 'ftp_login': 7, + 'function_exists': 4, + 'get_current_user': 10, + 'getcwd': 8, + 'getenv': 9, + 'getmxrr': 5, + 'getmygid': 10, + 'getmygid': 10, + 'getmyinode': 10, + 'getmypid': 10, + 'getmyuid': 10, + 'gzinflate': 7, + 'gzinflate(base64_decode(': 100, + 'gzuncompress': 7, + 'ini_get': 6, + 'ini_set': 6, + 'is_readable': 10, + 'mysql_get_client_info': 7, + 'open_basedir': 9, + 'passthru': 10, + 'passthru($_GET': 100, + 'passthru($_POST': 100, + 'passthru($_REQUEST': 100, + 'pclose': 9, + 'pcntl_fork': 10, + 'php_logo_guid': 10, + 'php_uname': 8, + 'phpcredits': 10, + 'phpinfo': 10, + 'phpversion': 5, + 'pnctl_exec': 10, + 'pnctl_fork': 10, + 'popen': 10, + 'posix_getegid': 10, + 'posix_geteuid': 10, + 'posix_getgetgruid': 10, + 'posix_getpwuid': 10, + 'posix_kill': 10, + 'posix_mkfifo': 10, + 'posix_setgid': 10, + 'posix_setpgid': 10, + 'posix_setsid': 10, + 'posix_setuid': 10, + 'posix_uname': 10, + 'php://input': 7, + 'proc_close': 10, + 'proc_get_status': 10, + 'proc_nice': 10, + 'proc_open': 10, + 'proc_terminate': 10, + 'putenv': 10, + 'putenv("PHP': 100, # Shellshock exploit + 'putenv(\'PHP': 100, # Shellshock exploit + 'safe_mode': 10, + 'shell_exec': 10, + 'show_source': 10, + 'socket_create(AF_INET, SOCK_STREAM, SOL_TCP)': 10, # Used for SYN flood + 'symlink': 8, + 'system(': 9, + 'system($_GET': 100, + 'system($_POST': 100, + 'system($_REQUEST': 100, + 'win_shell_execute': 10, + 'win_create_service': 100, + 'wscript': 8, + 'zend_logo_guid': 10, + 'zend_thread_id': 9, + 'zend_version': 9, + } + + dodgy_terms.update({ + '/bin/bash ': 100, + '/bin/sh ': 100, + '/etc/hosts': 100, + '/etc/passwd': 100, + '/etc/resolv.conf ': 100, + '/etc/shadow': 100, + '/etc/syslog.conf': 100, + '/proc/cpuinfo': 10, + '/tmp': 8, + '/var/cpanel/accounting.log': 100, + 'IRC server': 100, + 'LD_PRELOAD': 100, + 'PRIVMSG': 100, + 'Safe Mod Bypass': 100, + 'Shell ': 9, + '\\x': 3, # Shellcodes + '\x00/../': 100, # safe mode bypass + 'backdoor': 10, + 'bypass': 8, + 'chkrootkit': 100, + 'chmod 777': 7, + 'cmd.exe': 100, + 'dir /OG /X': 100, + 'find . -type f': 100, + 'gcc ': 8, + 'id_rsa': 100, + 'ipconfig /all': 100, + 'jschl_vc': 100, # Cloudflare bypass + 'jschl_answer': 100, # Cloudflare bypass + 'kernel32.dll': 100, + 'ls -la': 100, + 'milw0rm': 100, + 'my.cnf': 100, + 'my.conf': 100, + 'nc -l': 100, + 'netstat ': 100, + 'file:file://': 100, # basedir bypass + 'portsentry': 100, + 'proftpd.conf': 100, + 'ps -aux': 100, + 'rkhunter': 100, + 'shellcode': 100, + 'slowloris': 100, + 'snort': 100, + 'system32': 9, + 'tripwire': 100, + 'uname -a': 100, + 'wget': 8, + 'WinExec': 10, + }) + + dodgy_terms.update({ + '/cdn-cgi/l/chk_jschl': 100, # Cloudflare bypass for DDoS'ing + 'Antichat Shell': 100, + 'Cr@zy_King': 100, + 'KAdot@ngs.ru': 100, + 'Kacak': 100, + 'KingDefacer': 100, + 'SimAttacker': 100, + 'SoldiersOfAllah': 100, + 'ak74-team.net': 100, + 'alturks.com': 100, + 'egy_spider' : 100, + 'egyspider.eu' : 100, + 'exploit-db.com': 100, + 'forever5pi': 100, + 'grayhatz.org': 100, + 'kacaq.blogspot.com': 100, + 'locus7s.com': 100, + 'michaeldaw.org': 100, + 'milw0rm.com': 100, + 'pentestmonkey': 100, + 'r57.biz': 100, + 'r57shell.net': 100, + 'rootshell-team.info': 100, + 'simorgh': 100, + 'thecrowsrew.org': 100, + 'vnhacker.org': 100, + 'xdevil.org': 100, + 'zehirhacker': 100, + '~z0mbie': 100, + }) + + def populate(self, path): + ''' Does nothing :< + ''' + pass + + def evaluate(self, path): + ''' Check the given file against a list of know dodgy strings. + The calculation formulae is empirical. + @ret A sorted list of the form [name, match_in_percent_superior_to_zero] + ''' + fsize = os.path.getsize(path) + if not fsize: + return None + + content = '' + with open(path, 'r') as f: + content = f.read() + + score = 0 + for key,data in self.dodgy_terms.iteritems(): + nb = content.find(key) * data + if nb > 0: + score += nb + score /= fsize + + logging.info('Grep score for ' + path + ' : ' + str(score)) + + if score > 75: + return [['MALWARE', min(score, 100)],] + return None + + def load(self, path): + pass + + def save(self, path): + pass + + def is_malware(self, path): + return self.evaluate(path) is not None + diff --git a/modules/levenshtein.py b/modules/levenshtein.py new file mode 100644 index 0000000..2e854e2 --- /dev/null +++ b/modules/levenshtein.py @@ -0,0 +1,73 @@ +''' +This modules has a super-awful complexity (something along n^4), +so I'm quite sure that you don't want to run it by default ;) + +Anyway, this modules computes the Levenshtein distance between samples of malwares +and files to check, to find similarities. +''' +import os + +import scanmodule + +def main(): + return Levenshtein() + +class Levenshtein(scanmodule.ScanModule): + name = 'levenshtein' + def populate(self, path): + ''' We can't really populate the database with Levenshtein scores, + but we can speedup the calculation by storing files lenghts + ''' + for root, _, filenames in os.walk(path): + for filename in filenames: + full_path = os.path.join(root, filename) + with open(full_path, 'r') as f: + self.samples[full_path] = [os.path.getsize(full_path), f.read().lower()] + + def evaluate(self, path): + ''' Compare the hash of the given path to every samples one. + @ret A sorted list of the form [name, match_in_percent_superior_to_zero] + ''' + file_to_test = path + file_size = os.path.getsize(file_to_test) + + lst = list() + for sample_name, sample_intel in self.samples.iteritems(): + if sample_name != file_to_test: + score = self.__levenshtein(file_to_test, sample_intel[1]) + score = score / ((file_size + sample_intel[0]) / 2.0) # mean value + if score > 25: # if the match is under 10%, we don't care + lst.append([sample_name, score * 10]) + return sorted(lst, key=lambda lst: lst[1], reverse=True) + + def __levenshtein_file(self, f, b): + ''' Computes the Levenshtein's distance between a file and a buffer + @param f1 File + @param fs2 Buffer + @return The levenshtein distance + ''' + with open(f, 'r') as of: + return self.__levenshtein(of.read().lower(), b) + + def __levenshtein(self, s1, s2): + ''' Computes the Levenshtein's distance between two strings + @param s1 First string + @param s2 Second string + @return The levenshtein distance + ''' + + if len(s1) < len(s2): # Minimize computation + s1, s2 = s2, s1 + + previous_row = range(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + + return previous_row[-1] + diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py new file mode 100644 index 0000000..a0d3f15 --- /dev/null +++ b/modules/libfuzzy.py @@ -0,0 +1,98 @@ +''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check +if a file is similar to a given list of samples +''' + +import os +import ctypes +import pickle +import sys +import logging +logging.basicConfig(level=logging.DEBUG) + +import scanmodule + + +SPAMSUM_LENGTH = 64 +FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 + +def main(): + return FuzzyMatcher() + +class FuzzyMatcher(scanmodule.ScanModule): + name = 'libfuzzy' + def __init__(self, samples_path=None, persistence_path=None): + self.__initialize_libfuzzy() + super(FuzzyMatcher, self).__init__() + + def __initialize_libfuzzy(self): + ''' Bind to libfuzzy thanks to ctypes. + This will create the "fuzzy_hash_buf" and + the "fuzzy_compare" methods + ''' + try: + fuzzy = ctypes.CDLL('libfuzzy.so') + except OSError: + print('[-] Please check that you installed libfuzzy') + sys.exit(1) + + self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf + self.__fuzzy_hash_buf.restype = ctypes.c_int + self.__fuzzy_hash_buf.argtypes = [ + ctypes.c_char_p, #buf + ctypes.c_uint32, #buf_len + ctypes.c_char_p, #result + ] + self.__fuzzy_compare = fuzzy.fuzzy_compare + self.__fuzzy_compare.restype = ctypes.c_int + self.__fuzzy_compare.argtypes = [ + ctypes.c_char_p, #sig1 + ctypes.c_char_p, #sig2 + ] + + def populate(self, path): + ''' Computes fuzzy hashes of files under the given path, + and store them in the dict self.samples with the form dict {name: fuzzy_hash} + @param path Path containing the samples + ''' + for root, _, filenames in os.walk(path): + for filename in filenames: + full_path = os.path.join(root, filename) + self.samples[full_path] = self.__hash_from_file(full_path) + + def __hash_from_file(self, path): + ''' Return the hash of the given file + @param path Path to the file to hash + @ret Fuzzy hash of the given file + ''' + with open(path, 'r') as f: + out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) + content = f.read() + self.__fuzzy_hash_buf(content, len(content), out) + return out.value + + def evaluate(self, path): + ''' Compare the hash of the given path to every samples one. + @ret A sorted list of the form [name, match_in_percent_superior_to_zero] + ''' + fuzzy_hash = self.__hash_from_file(path) + + lst = list() + for f in self.samples: + score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) + if score: + lst.append([f, score]) + return sorted(lst, key=lambda lst: lst[1], reverse=True) + + def is_malware(self, path): + max_score = 0 + fuzzy_hash = self.__hash_from_file(path) + + for f in self.samples: + score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) + if score > max_score: + score = max_score + logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') + + return max_score > 90 + + diff --git a/modules/scanmodule.py b/modules/scanmodule.py new file mode 100644 index 0000000..6ace387 --- /dev/null +++ b/modules/scanmodule.py @@ -0,0 +1,56 @@ +import ConfigParser +import pickle + + +class ScanModule(object): + def __init__(self): + self.config = ConfigParser.ConfigParser() + self.config.read('modules.conf') + + self.samples = dict() + + try: + self.populate(self.config.get(self.name, 'samples')) + except ConfigParser.NoOptionError: + pass + + try: + self.load(self.config.get(self.name, 'persistence')) + except ConfigParser.NoOptionError: + pass + + def is_disable(self): + try: + return self.config.getboolean(self.name, 'disable') + except ConfigParser.NoOptionError: + return False + + def evaluate(self, path): + ''' Return in percent, the probability that + the file is a malware + @param path File to evaluate + ''' + raise NotImplemented + + def populate(self, path): + ''' Populate the module's internal database + with data from the given path + @param path Path to the data + ''' + raise NotImplemented + + + def load(self, path): + ''' Unpickle the given path, and updates the samples dict with it. + @param path Path to the dict to unpickle + ''' + with open(path, 'r') as f: + self.samples.update(pickle.load(f)) + + def save(self, path): + ''' Save the database to the given file + @param path Path where to save the database + ''' + with open(path, 'w') as f: + pickle.dump(self.samples, f) + diff --git a/modules/whitelist.py b/modules/whitelist.py new file mode 100644 index 0000000..587b392 --- /dev/null +++ b/modules/whitelist.py @@ -0,0 +1,46 @@ +import os +import hashlib +import scanmodule + +def main(): + return HashWhitelist() + +class HashWhitelist(scanmodule.ScanModule): + name = 'hashwhitelist' + def evaluate(self, path): + ''' Return in percent, the probability that + the file is a malware + @param path File to evaluate + ''' + sha1 = '' + with open(path, 'r') as f: + sha1 = hashlib.sha1(f.read()).hexdigest() + + lst = list() + for f in self.samples: + if sha1 == self.samples[f]: + lst.append([f, 100]) + return sorted(lst, key=lambda lst: lst[1], reverse=True) + + def is_malware(self, path): + ''' Return False if the file is whitelisted + ''' + sha1 = '' + with open(path, 'r') as f: + sha1 = hashlib.sha1(f.read()).hexdigest() + + for f in self.samples: + if sha1 == self.samples[f]: + return False + return True + + def populate(self, path): + ''' Populate the module's internal database + with data from the given path + @param path Path to the data + ''' + for root, _, filenames in os.walk(path): + for filename in filenames: + full_path = os.path.join(root, filename) + with open(full_path, 'r') as f: + self.samples[full_path] = hashlib.sha1(f.read()).hexdigest() -- cgit v1.3