From 6beeeebe3c43f0643e521139d3f8b1ff4a7f3059 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Thu, 5 Mar 2015 15:36:22 +0100 Subject: Yara is cooler than Python --- modules/__init__.py | 0 modules/entropy.py | 56 ------------ modules/grep_count.py | 234 ------------------------------------------------- modules/levenshtein.py | 73 --------------- modules/libfuzzy.py | 98 --------------------- modules/scanmodule.py | 56 ------------ modules/whitelist.py | 46 ---------- 7 files changed, 563 deletions(-) delete mode 100644 modules/__init__.py delete mode 100644 modules/entropy.py delete mode 100644 modules/grep_count.py delete mode 100644 modules/levenshtein.py delete mode 100644 modules/libfuzzy.py delete mode 100644 modules/scanmodule.py delete mode 100644 modules/whitelist.py (limited to 'modules') diff --git a/modules/__init__.py b/modules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/modules/entropy.py b/modules/entropy.py deleted file mode 100644 index 48b2924..0000000 --- a/modules/entropy.py +++ /dev/null @@ -1,56 +0,0 @@ -''' This module uses shannon's Entropy to detect packed malwares -''' -import os -import math -import logging -logging.basicConfig(level=logging.DEBUG) - -import scanmodule - -def main(): - return Entropy() - -class Entropy(scanmodule.ScanModule): - name = 'entropy' - def populate(self, path): - pass - def load(self, path): - pass - def save(self, path): - pass - - def __compute_score(self, path): - return (self.__entropy(path) - 5) * 100 - - def is_malware(self, path): - score = self.__compute_score(path) - logging.info('Entropy score for ' + path + ' : ' + str(score)) - return score > 75 - - def evaluate(self, path): - ''' Computes an arbitraty score for the given path - @ret A sorted list of the form [name, match_in_percent_superior_to_zero] - ''' - score = self.__compute_score(path) - if score > 0: - return [['MALWARE', score],] - return None - - def __entropy(self, path): - ''' Computes shannon's entropy for the given file - @param path Path to the file - ''' - # Computes the frequency of each byte in the file - fsize = max(float(os.path.getsize(path)), 1.0) - - freq = [0] * 256 - with open(path, 'rb') as f: - for c in f.read(): - freq[ord(c)] += 1 - - entropy = 0.0 - for f in freq: - if f: - f /= fsize - entropy += f * math.log(f, 2) - return -entropy diff --git a/modules/grep_count.py b/modules/grep_count.py deleted file mode 100644 index 2431960..0000000 --- a/modules/grep_count.py +++ /dev/null @@ -1,234 +0,0 @@ -''' This module count the occurences of dodgy terms present in a file -''' -import os -import logging -logging.basicConfig(level=logging.DEBUG) - -import scanmodule - -def main(): - return GrepCount() - -class GrepCount(scanmodule.ScanModule): - name = 'grep count' - - # ranked from 1 to 10, 10 being EVIL - # Also, 100 is awarded to MEGA-DUH-OBVIOUS things. - dodgy_terms = { - '$GLOBALS': 6, - 'WWW-Authenticate': 7, - 'ZipArchive': 6, - 'apache_get_modules': 8, - 'assert': 5, - 'base64_decode': 7, - 'bzdecompress': 7, - 'chmod': 8, - 'curl_init("file://': 100, # safe mode bypass exploit - #'dl': 10, - #'exec': 10, - 'eval(': 10, - 'eval(base64_decode': 100, - 'eval($_GET': 100, - 'eval($_POST': 100, - 'eval($_REQUEST': 100, - 'eval(base64_decode': 100, - 'eval(gzinflate': 100, - 'file_get_contents': 6, - 'fpassthru': 100, - 'fsockopen': 7, - 'ftp_connect': 7, - 'ftp_exec': 7, - 'ftp_login': 7, - 'function_exists': 4, - 'get_current_user': 10, - 'getcwd': 8, - 'getenv': 9, - 'getmxrr': 5, - 'getmygid': 10, - 'getmygid': 10, - 'getmyinode': 10, - 'getmypid': 10, - 'getmyuid': 10, - 'gzinflate': 7, - 'gzinflate(base64_decode(': 100, - 'gzuncompress': 7, - 'ini_get': 6, - 'ini_set': 6, - 'is_readable': 10, - 'mysql_get_client_info': 7, - 'open_basedir': 9, - 'passthru': 10, - 'passthru($_GET': 100, - 'passthru($_POST': 100, - 'passthru($_REQUEST': 100, - 'pclose': 9, - 'pcntl_fork': 10, - 'php_logo_guid': 10, - 'php_uname': 8, - 'phpcredits': 10, - 'phpinfo': 10, - 'phpversion': 5, - 'pnctl_exec': 10, - 'pnctl_fork': 10, - 'popen': 10, - 'posix_getegid': 10, - 'posix_geteuid': 10, - 'posix_getgetgruid': 10, - 'posix_getpwuid': 10, - 'posix_kill': 10, - 'posix_mkfifo': 10, - 'posix_setgid': 10, - 'posix_setpgid': 10, - 'posix_setsid': 10, - 'posix_setuid': 10, - 'posix_uname': 10, - 'php://input': 7, - 'proc_close': 10, - 'proc_get_status': 10, - 'proc_nice': 10, - 'proc_open': 10, - 'proc_terminate': 10, - 'putenv': 10, - 'putenv("PHP': 100, # Shellshock exploit - 'putenv(\'PHP': 100, # Shellshock exploit - 'safe_mode': 10, - 'shell_exec': 10, - 'show_source': 10, - 'socket_create(AF_INET, SOCK_STREAM, SOL_TCP)': 10, # Used for SYN flood - 'symlink': 8, - 'system(': 9, - 'system($_GET': 100, - 'system($_POST': 100, - 'system($_REQUEST': 100, - 'win_shell_execute': 10, - 'win_create_service': 100, - 'wscript': 8, - 'zend_logo_guid': 10, - 'zend_thread_id': 9, - 'zend_version': 9, - } - - dodgy_terms.update({ - '/bin/bash ': 100, - '/bin/sh ': 100, - '/etc/hosts': 100, - '/etc/passwd': 100, - '/etc/resolv.conf ': 100, - '/etc/shadow': 100, - '/etc/syslog.conf': 100, - '/proc/cpuinfo': 10, - '/tmp': 8, - '/var/cpanel/accounting.log': 100, - 'IRC server': 100, - 'LD_PRELOAD': 100, - 'PRIVMSG': 100, - 'Safe Mod Bypass': 100, - 'Shell ': 9, - '\\x': 3, # Shellcodes - '\x00/../': 100, # safe mode bypass - 'backdoor': 10, - 'bypass': 8, - 'chkrootkit': 100, - 'chmod 777': 7, - 'cmd.exe': 100, - 'dir /OG /X': 100, - 'find . -type f': 100, - 'gcc ': 8, - 'id_rsa': 100, - 'ipconfig /all': 100, - 'jschl_vc': 100, # Cloudflare bypass - 'jschl_answer': 100, # Cloudflare bypass - 'kernel32.dll': 100, - 'ls -la': 100, - 'milw0rm': 100, - 'my.cnf': 100, - 'my.conf': 100, - 'nc -l': 100, - 'netstat ': 100, - 'file:file://': 100, # basedir bypass - 'portsentry': 100, - 'proftpd.conf': 100, - 'ps -aux': 100, - 'rkhunter': 100, - 'shellcode': 100, - 'slowloris': 100, - 'snort': 100, - 'system32': 9, - 'tripwire': 100, - 'uname -a': 100, - 'wget': 8, - 'WinExec': 10, - }) - - dodgy_terms.update({ - '/cdn-cgi/l/chk_jschl': 100, # Cloudflare bypass for DDoS'ing - 'Antichat Shell': 100, - 'Cr@zy_King': 100, - 'KAdot@ngs.ru': 100, - 'Kacak': 100, - 'KingDefacer': 100, - 'SimAttacker': 100, - 'SoldiersOfAllah': 100, - 'ak74-team.net': 100, - 'alturks.com': 100, - 'egy_spider' : 100, - 'egyspider.eu' : 100, - 'exploit-db.com': 100, - 'forever5pi': 100, - 'grayhatz.org': 100, - 'kacaq.blogspot.com': 100, - 'locus7s.com': 100, - 'michaeldaw.org': 100, - 'milw0rm.com': 100, - 'pentestmonkey': 100, - 'r57.biz': 100, - 'r57shell.net': 100, - 'rootshell-team.info': 100, - 'simorgh': 100, - 'thecrowsrew.org': 100, - 'vnhacker.org': 100, - 'xdevil.org': 100, - 'zehirhacker': 100, - '~z0mbie': 100, - }) - - def populate(self, path): - ''' Does nothing :< - ''' - pass - - def evaluate(self, path): - ''' Check the given file against a list of know dodgy strings. - The calculation formulae is empirical. - @ret A sorted list of the form [name, match_in_percent_superior_to_zero] - ''' - fsize = os.path.getsize(path) - if not fsize: - return None - - content = '' - with open(path, 'r') as f: - content = f.read() - - score = 0 - for key,data in self.dodgy_terms.iteritems(): - nb = content.find(key) * data - if nb > 0: - score += nb - score /= fsize - - logging.info('Grep score for ' + path + ' : ' + str(score)) - - if score > 75: - return [['MALWARE', min(score, 100)],] - return None - - def load(self, path): - pass - - def save(self, path): - pass - - def is_malware(self, path): - return self.evaluate(path) is not None - diff --git a/modules/levenshtein.py b/modules/levenshtein.py deleted file mode 100644 index 2e854e2..0000000 --- a/modules/levenshtein.py +++ /dev/null @@ -1,73 +0,0 @@ -''' -This modules has a super-awful complexity (something along n^4), -so I'm quite sure that you don't want to run it by default ;) - -Anyway, this modules computes the Levenshtein distance between samples of malwares -and files to check, to find similarities. -''' -import os - -import scanmodule - -def main(): - return Levenshtein() - -class Levenshtein(scanmodule.ScanModule): - name = 'levenshtein' - def populate(self, path): - ''' We can't really populate the database with Levenshtein scores, - but we can speedup the calculation by storing files lenghts - ''' - for root, _, filenames in os.walk(path): - for filename in filenames: - full_path = os.path.join(root, filename) - with open(full_path, 'r') as f: - self.samples[full_path] = [os.path.getsize(full_path), f.read().lower()] - - def evaluate(self, path): - ''' Compare the hash of the given path to every samples one. - @ret A sorted list of the form [name, match_in_percent_superior_to_zero] - ''' - file_to_test = path - file_size = os.path.getsize(file_to_test) - - lst = list() - for sample_name, sample_intel in self.samples.iteritems(): - if sample_name != file_to_test: - score = self.__levenshtein(file_to_test, sample_intel[1]) - score = score / ((file_size + sample_intel[0]) / 2.0) # mean value - if score > 25: # if the match is under 10%, we don't care - lst.append([sample_name, score * 10]) - return sorted(lst, key=lambda lst: lst[1], reverse=True) - - def __levenshtein_file(self, f, b): - ''' Computes the Levenshtein's distance between a file and a buffer - @param f1 File - @param fs2 Buffer - @return The levenshtein distance - ''' - with open(f, 'r') as of: - return self.__levenshtein(of.read().lower(), b) - - def __levenshtein(self, s1, s2): - ''' Computes the Levenshtein's distance between two strings - @param s1 First string - @param s2 Second string - @return The levenshtein distance - ''' - - if len(s1) < len(s2): # Minimize computation - s1, s2 = s2, s1 - - previous_row = range(len(s2) + 1) - for i, c1 in enumerate(s1): - current_row = [i + 1] - for j, c2 in enumerate(s2): - insertions = previous_row[j + 1] + 1 - deletions = current_row[j] + 1 - substitutions = previous_row[j] + (c1 != c2) - current_row.append(min(insertions, deletions, substitutions)) - previous_row = current_row - - return previous_row[-1] - diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py deleted file mode 100644 index a0d3f15..0000000 --- a/modules/libfuzzy.py +++ /dev/null @@ -1,98 +0,0 @@ -''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check -if a file is similar to a given list of samples -''' - -import os -import ctypes -import pickle -import sys -import logging -logging.basicConfig(level=logging.DEBUG) - -import scanmodule - - -SPAMSUM_LENGTH = 64 -FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20 - -def main(): - return FuzzyMatcher() - -class FuzzyMatcher(scanmodule.ScanModule): - name = 'libfuzzy' - def __init__(self, samples_path=None, persistence_path=None): - self.__initialize_libfuzzy() - super(FuzzyMatcher, self).__init__() - - def __initialize_libfuzzy(self): - ''' Bind to libfuzzy thanks to ctypes. - This will create the "fuzzy_hash_buf" and - the "fuzzy_compare" methods - ''' - try: - fuzzy = ctypes.CDLL('libfuzzy.so') - except OSError: - print('[-] Please check that you installed libfuzzy') - sys.exit(1) - - self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf - self.__fuzzy_hash_buf.restype = ctypes.c_int - self.__fuzzy_hash_buf.argtypes = [ - ctypes.c_char_p, #buf - ctypes.c_uint32, #buf_len - ctypes.c_char_p, #result - ] - self.__fuzzy_compare = fuzzy.fuzzy_compare - self.__fuzzy_compare.restype = ctypes.c_int - self.__fuzzy_compare.argtypes = [ - ctypes.c_char_p, #sig1 - ctypes.c_char_p, #sig2 - ] - - def populate(self, path): - ''' Computes fuzzy hashes of files under the given path, - and store them in the dict self.samples with the form dict {name: fuzzy_hash} - @param path Path containing the samples - ''' - for root, _, filenames in os.walk(path): - for filename in filenames: - full_path = os.path.join(root, filename) - self.samples[full_path] = self.__hash_from_file(full_path) - - def __hash_from_file(self, path): - ''' Return the hash of the given file - @param path Path to the file to hash - @ret Fuzzy hash of the given file - ''' - with open(path, 'r') as f: - out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT) - content = f.read() - self.__fuzzy_hash_buf(content, len(content), out) - return out.value - - def evaluate(self, path): - ''' Compare the hash of the given path to every samples one. - @ret A sorted list of the form [name, match_in_percent_superior_to_zero] - ''' - fuzzy_hash = self.__hash_from_file(path) - - lst = list() - for f in self.samples: - score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) - if score: - lst.append([f, score]) - return sorted(lst, key=lambda lst: lst[1], reverse=True) - - def is_malware(self, path): - max_score = 0 - fuzzy_hash = self.__hash_from_file(path) - - for f in self.samples: - score = self.__fuzzy_compare(fuzzy_hash, self.samples[f]) - if score > max_score: - score = max_score - logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%') - - return max_score > 90 - - diff --git a/modules/scanmodule.py b/modules/scanmodule.py deleted file mode 100644 index 6ace387..0000000 --- a/modules/scanmodule.py +++ /dev/null @@ -1,56 +0,0 @@ -import ConfigParser -import pickle - - -class ScanModule(object): - def __init__(self): - self.config = ConfigParser.ConfigParser() - self.config.read('modules.conf') - - self.samples = dict() - - try: - self.populate(self.config.get(self.name, 'samples')) - except ConfigParser.NoOptionError: - pass - - try: - self.load(self.config.get(self.name, 'persistence')) - except ConfigParser.NoOptionError: - pass - - def is_disable(self): - try: - return self.config.getboolean(self.name, 'disable') - except ConfigParser.NoOptionError: - return False - - def evaluate(self, path): - ''' Return in percent, the probability that - the file is a malware - @param path File to evaluate - ''' - raise NotImplemented - - def populate(self, path): - ''' Populate the module's internal database - with data from the given path - @param path Path to the data - ''' - raise NotImplemented - - - def load(self, path): - ''' Unpickle the given path, and updates the samples dict with it. - @param path Path to the dict to unpickle - ''' - with open(path, 'r') as f: - self.samples.update(pickle.load(f)) - - def save(self, path): - ''' Save the database to the given file - @param path Path where to save the database - ''' - with open(path, 'w') as f: - pickle.dump(self.samples, f) - diff --git a/modules/whitelist.py b/modules/whitelist.py deleted file mode 100644 index 587b392..0000000 --- a/modules/whitelist.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import hashlib -import scanmodule - -def main(): - return HashWhitelist() - -class HashWhitelist(scanmodule.ScanModule): - name = 'hashwhitelist' - def evaluate(self, path): - ''' Return in percent, the probability that - the file is a malware - @param path File to evaluate - ''' - sha1 = '' - with open(path, 'r') as f: - sha1 = hashlib.sha1(f.read()).hexdigest() - - lst = list() - for f in self.samples: - if sha1 == self.samples[f]: - lst.append([f, 100]) - return sorted(lst, key=lambda lst: lst[1], reverse=True) - - def is_malware(self, path): - ''' Return False if the file is whitelisted - ''' - sha1 = '' - with open(path, 'r') as f: - sha1 = hashlib.sha1(f.read()).hexdigest() - - for f in self.samples: - if sha1 == self.samples[f]: - return False - return True - - def populate(self, path): - ''' Populate the module's internal database - with data from the given path - @param path Path to the data - ''' - for root, _, filenames in os.walk(path): - for filename in filenames: - full_path = os.path.join(root, filename) - with open(full_path, 'r') as f: - self.samples[full_path] = hashlib.sha1(f.read()).hexdigest() -- cgit v1.3