From 807248f9343a4cabb48c3be1a512b27f6377e871 Mon Sep 17 00:00:00 2001
From: Julien Voisin
Date: Tue, 3 Mar 2015 15:58:59 +0100
Subject: First commit!

---
 modules/__init__.py    |   0
 modules/entropy.py     |  56 ++++++++++++
 modules/grep_count.py  | 234 +++++++++++++++++++++++++++++++++++++++++++++++++
 modules/levenshtein.py |  73 +++++++++++++++
 modules/libfuzzy.py    |  98 +++++++++++++++++++++
 modules/scanmodule.py  |  56 ++++++++++++
 modules/whitelist.py   |  46 ++++++++++
 7 files changed, 563 insertions(+)
 create mode 100644 modules/__init__.py
 create mode 100644 modules/entropy.py
 create mode 100644 modules/grep_count.py
 create mode 100644 modules/levenshtein.py
 create mode 100644 modules/libfuzzy.py
 create mode 100644 modules/scanmodule.py
 create mode 100644 modules/whitelist.py

(limited to 'modules')

diff --git a/modules/__init__.py b/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modules/entropy.py b/modules/entropy.py
new file mode 100644
index 0000000..48b2924
--- /dev/null
+++ b/modules/entropy.py
@@ -0,0 +1,56 @@
+''' This module uses shannon's Entropy to detect packed malwares
+'''
+import os
+import math
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+import scanmodule
+
+def main():
+    return Entropy()
+
+class Entropy(scanmodule.ScanModule):
+    name = 'entropy'
+    def populate(self, path):
+        pass
+    def load(self, path):
+        pass
+    def save(self, path):
+        pass
+
+    def __compute_score(self, path):
+        return (self.__entropy(path) - 5) * 100
+
+    def is_malware(self, path):
+        score = self.__compute_score(path)
+        logging.info('Entropy score for ' + path + ' : ' + str(score))
+        return  score > 75
+
+    def evaluate(self, path):
+        ''' Computes an arbitraty score for the given path
+        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
+        '''
+        score = self.__compute_score(path)
+        if score > 0:
+            return [['MALWARE', score],]
+        return None
+
+    def __entropy(self, path):
+        ''' Computes shannon's entropy for the given file
+        @param path Path to the file
+        '''
+        # Computes the frequency of each byte in the file
+        fsize = max(float(os.path.getsize(path)), 1.0)
+
+        freq = [0] * 256
+        with open(path, 'rb') as f:
+            for c in f.read():
+                freq[ord(c)] += 1
+
+        entropy = 0.0
+        for f in freq:
+            if f:
+                f /= fsize
+                entropy += f * math.log(f, 2)
+        return -entropy
diff --git a/modules/grep_count.py b/modules/grep_count.py
new file mode 100644
index 0000000..2431960
--- /dev/null
+++ b/modules/grep_count.py
@@ -0,0 +1,234 @@
+''' This module count the occurences of dodgy terms present in a file
+'''
+import os
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+import scanmodule
+
+def main():
+    return GrepCount()
+
+class GrepCount(scanmodule.ScanModule):
+    name = 'grep count'
+
+    # ranked from 1 to 10, 10 being EVIL
+    # Also, 100 is awarded to MEGA-DUH-OBVIOUS things.
+    dodgy_terms = {
+            '$GLOBALS': 6,
+            'WWW-Authenticate': 7,
+            'ZipArchive': 6,
+            'apache_get_modules': 8,
+            'assert': 5,
+            'base64_decode': 7,
+            'bzdecompress': 7,
+            'chmod': 8,
+            'curl_init("file://': 100,  # safe mode bypass exploit
+            #'dl': 10,
+            #'exec': 10,
+            'eval(': 10,
+            'eval(base64_decode': 100,
+            'eval($_GET': 100,
+            'eval($_POST': 100,
+            'eval($_REQUEST': 100,
+            'eval(base64_decode': 100,
+            'eval(gzinflate': 100,
+            'file_get_contents': 6,
+            'fpassthru': 100,
+            'fsockopen': 7,
+            'ftp_connect': 7,
+            'ftp_exec': 7,
+            'ftp_login': 7,
+            'function_exists': 4,
+            'get_current_user': 10,
+            'getcwd': 8,
+            'getenv': 9,
+            'getmxrr': 5,
+            'getmygid': 10,
+            'getmygid': 10,
+            'getmyinode': 10,
+            'getmypid': 10,
+            'getmyuid': 10,
+            'gzinflate': 7,
+            'gzinflate(base64_decode(': 100,
+            'gzuncompress': 7,
+            'ini_get': 6,
+            'ini_set': 6,
+            'is_readable': 10,
+            'mysql_get_client_info': 7,
+            'open_basedir': 9,
+            'passthru': 10,
+            'passthru($_GET': 100,
+            'passthru($_POST': 100,
+            'passthru($_REQUEST': 100,
+            'pclose': 9,
+            'pcntl_fork': 10,
+            'php_logo_guid': 10,
+            'php_uname': 8,
+            'phpcredits': 10,
+            'phpinfo': 10,
+            'phpversion': 5,
+            'pnctl_exec': 10,
+            'pnctl_fork': 10,
+            'popen': 10,
+            'posix_getegid': 10,
+            'posix_geteuid': 10,
+            'posix_getgetgruid': 10,
+            'posix_getpwuid': 10,
+            'posix_kill': 10,
+            'posix_mkfifo': 10,
+            'posix_setgid': 10,
+            'posix_setpgid': 10,
+            'posix_setsid': 10,
+            'posix_setuid': 10,
+            'posix_uname': 10,
+            'php://input': 7,
+            'proc_close': 10,
+            'proc_get_status': 10,
+            'proc_nice': 10,
+            'proc_open': 10,
+            'proc_terminate': 10,
+            'putenv': 10,
+            'putenv("PHP': 100,  # Shellshock exploit
+            'putenv(\'PHP': 100,  # Shellshock exploit
+            'safe_mode': 10,
+            'shell_exec': 10,
+            'show_source': 10,
+            'socket_create(AF_INET, SOCK_STREAM, SOL_TCP)': 10,  # Used for SYN flood
+            'symlink': 8,
+            'system(': 9,
+            'system($_GET': 100,
+            'system($_POST': 100,
+            'system($_REQUEST': 100,
+            'win_shell_execute': 10,
+            'win_create_service': 100,
+            'wscript': 8,
+            'zend_logo_guid': 10,
+            'zend_thread_id': 9,
+            'zend_version': 9,
+            }
+
+    dodgy_terms.update({
+            '/bin/bash ': 100,
+            '/bin/sh ': 100,
+            '/etc/hosts': 100,
+            '/etc/passwd': 100,
+            '/etc/resolv.conf ': 100,
+            '/etc/shadow': 100,
+            '/etc/syslog.conf': 100,
+            '/proc/cpuinfo': 10,
+            '/tmp': 8,
+            '/var/cpanel/accounting.log': 100,
+            'IRC server': 100,
+            'LD_PRELOAD': 100,
+            'PRIVMSG': 100,
+            'Safe Mod Bypass': 100,
+            'Shell ': 9,
+            '\\x': 3,  # Shellcodes
+            '\x00/../': 100,  # safe mode bypass
+            'backdoor': 10,
+            'bypass': 8,
+            'chkrootkit': 100,
+            'chmod 777': 7,
+            'cmd.exe': 100,
+            'dir /OG /X': 100,
+            'find . -type f': 100,
+            'gcc ': 8,
+            'id_rsa': 100,
+            'ipconfig /all': 100,
+            'jschl_vc': 100,  # Cloudflare bypass
+            'jschl_answer': 100,  # Cloudflare bypass
+            'kernel32.dll': 100,
+            'ls -la': 100,
+            'milw0rm': 100,
+            'my.cnf': 100,
+            'my.conf': 100,
+            'nc -l': 100,
+            'netstat ': 100,
+            'file:file://': 100,  # basedir bypass
+            'portsentry': 100,
+            'proftpd.conf': 100,
+            'ps -aux': 100,
+            'rkhunter': 100,
+            'shellcode': 100,
+            'slowloris': 100,
+            'snort': 100,
+            'system32': 9,
+            'tripwire': 100,
+            'uname -a': 100,
+            'wget': 8,
+            'WinExec': 10,
+            })
+
+    dodgy_terms.update({
+        '/cdn-cgi/l/chk_jschl': 100,  # Cloudflare bypass for DDoS'ing
+        'Antichat Shell': 100,
+        'Cr@zy_King': 100,
+        'KAdot@ngs.ru': 100,
+        'Kacak': 100,
+        'KingDefacer': 100,
+        'SimAttacker': 100,
+        'SoldiersOfAllah': 100,
+        'ak74-team.net': 100,
+        'alturks.com': 100,
+        'egy_spider' : 100,
+        'egyspider.eu' : 100,
+        'exploit-db.com': 100,
+        'forever5pi': 100,
+        'grayhatz.org': 100,
+        'kacaq.blogspot.com': 100,
+        'locus7s.com': 100,
+        'michaeldaw.org': 100,
+        'milw0rm.com': 100,
+        'pentestmonkey': 100,
+        'r57.biz': 100,
+        'r57shell.net': 100,
+        'rootshell-team.info': 100,
+        'simorgh': 100,
+        'thecrowsrew.org': 100,
+        'vnhacker.org': 100,
+        'xdevil.org': 100,
+        'zehirhacker': 100,
+        '~z0mbie': 100,
+        })
+
+    def populate(self, path):
+        ''' Does nothing :<
+        '''
+        pass
+
+    def evaluate(self, path):
+        ''' Check the given file against a list of know dodgy strings.
+        The calculation formulae is empirical.
+        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
+        '''
+        fsize = os.path.getsize(path)
+        if not fsize:
+            return None
+
+        content = ''
+        with open(path, 'r') as f:
+            content = f.read()
+
+        score = 0
+        for key,data in self.dodgy_terms.iteritems():
+            nb = content.find(key) * data
+            if nb > 0:
+                score += nb
+        score /= fsize
+
+        logging.info('Grep score for ' + path + ' : ' + str(score))
+
+        if score > 75:
+            return [['MALWARE', min(score, 100)],]
+        return None
+
+    def load(self, path):
+        pass
+
+    def save(self, path):
+        pass
+
+    def is_malware(self, path):
+        return self.evaluate(path) is not None
+
diff --git a/modules/levenshtein.py b/modules/levenshtein.py
new file mode 100644
index 0000000..2e854e2
--- /dev/null
+++ b/modules/levenshtein.py
@@ -0,0 +1,73 @@
+'''
+This modules has a super-awful complexity (something along n^4),
+so I'm quite sure that you don't want to run it by default ;)
+
+Anyway, this modules computes the Levenshtein distance between samples of malwares
+and files to check, to find similarities.
+'''
+import os
+
+import scanmodule
+
+def main():
+    return Levenshtein()
+
+class Levenshtein(scanmodule.ScanModule):
+    name = 'levenshtein'
+    def populate(self, path):
+        ''' We can't really populate the database with Levenshtein scores,
+        but we can speedup the calculation by storing files lenghts
+        '''
+        for root, _, filenames in os.walk(path):
+            for filename in filenames:
+                full_path = os.path.join(root, filename)
+                with open(full_path, 'r') as f:
+                    self.samples[full_path] = [os.path.getsize(full_path), f.read().lower()]
+
+    def evaluate(self, path):
+        ''' Compare the hash of the given path to every samples one.
+        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
+        '''
+        file_to_test = path
+        file_size = os.path.getsize(file_to_test)
+
+        lst = list()
+        for sample_name, sample_intel in self.samples.iteritems():
+            if sample_name != file_to_test:
+                score = self.__levenshtein(file_to_test, sample_intel[1])
+                score = score / ((file_size + sample_intel[0]) / 2.0)  # mean value
+                if score > 25:  # if the match is under 10%, we don't care
+                    lst.append([sample_name, score * 10])
+        return sorted(lst, key=lambda lst: lst[1], reverse=True)
+
+    def __levenshtein_file(self, f, b):
+        ''' Computes the Levenshtein's distance between a file and a buffer
+        @param f1 File
+        @param fs2 Buffer
+        @return The levenshtein distance
+        '''
+        with open(f, 'r') as of:
+            return self.__levenshtein(of.read().lower(), b)
+
+    def __levenshtein(self, s1, s2):
+        ''' Computes the Levenshtein's distance between two strings
+        @param s1 First string
+        @param s2 Second string
+        @return The levenshtein distance
+        '''
+
+        if len(s1) < len(s2):  # Minimize computation
+            s1, s2 = s2, s1
+
+        previous_row = range(len(s2) + 1)
+        for i, c1 in enumerate(s1):
+            current_row = [i + 1]
+            for j, c2 in enumerate(s2):
+                insertions = previous_row[j + 1] + 1
+                deletions = current_row[j] + 1
+                substitutions = previous_row[j] + (c1 != c2)
+                current_row.append(min(insertions, deletions, substitutions))
+            previous_row = current_row
+
+        return previous_row[-1]
+
diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py
new file mode 100644
index 0000000..a0d3f15
--- /dev/null
+++ b/modules/libfuzzy.py
@@ -0,0 +1,98 @@
+''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check
+if a file is similar to a given list of samples
+'''
+
+import os
+import ctypes
+import pickle
+import sys
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+import scanmodule
+
+
+SPAMSUM_LENGTH = 64
+FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20
+
+def main():
+    return FuzzyMatcher()
+
+class FuzzyMatcher(scanmodule.ScanModule):
+    name = 'libfuzzy'
+    def __init__(self, samples_path=None, persistence_path=None):
+        self.__initialize_libfuzzy()
+        super(FuzzyMatcher, self).__init__()
+
+    def __initialize_libfuzzy(self):
+        ''' Bind to libfuzzy thanks to ctypes.
+        This will create the "fuzzy_hash_buf" and
+        the "fuzzy_compare" methods
+        '''
+        try:
+            fuzzy = ctypes.CDLL('libfuzzy.so')
+        except OSError:
+            print('[-] Please check that you installed libfuzzy')
+            sys.exit(1)
+
+        self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf
+        self.__fuzzy_hash_buf.restype = ctypes.c_int
+        self.__fuzzy_hash_buf.argtypes = [
+            ctypes.c_char_p, #buf
+            ctypes.c_uint32, #buf_len
+            ctypes.c_char_p, #result
+        ]
+        self.__fuzzy_compare = fuzzy.fuzzy_compare
+        self.__fuzzy_compare.restype = ctypes.c_int
+        self.__fuzzy_compare.argtypes = [
+            ctypes.c_char_p, #sig1
+            ctypes.c_char_p, #sig2
+        ]
+
+    def populate(self, path):
+        ''' Computes fuzzy hashes of files under the given path,
+        and store them in the dict self.samples with the form dict {name: fuzzy_hash}
+        @param path Path containing the samples
+        '''
+        for root, _, filenames in os.walk(path):
+            for filename in filenames:
+                full_path = os.path.join(root, filename)
+                self.samples[full_path] = self.__hash_from_file(full_path)
+
+    def __hash_from_file(self, path):
+        ''' Return the hash of the given file
+        @param path Path to the file to hash
+        @ret Fuzzy hash of the given file
+        '''
+        with open(path, 'r') as f:
+            out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT)
+            content = f.read()
+            self.__fuzzy_hash_buf(content, len(content), out)
+            return out.value
+
+    def evaluate(self, path):
+        ''' Compare the hash of the given path to every samples one.
+        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
+        '''
+        fuzzy_hash = self.__hash_from_file(path)
+
+        lst = list()
+        for f in self.samples:
+            score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
+            if score:
+                lst.append([f, score])
+        return sorted(lst, key=lambda lst: lst[1], reverse=True)
+
+    def is_malware(self, path):
+        max_score = 0
+        fuzzy_hash = self.__hash_from_file(path)
+
+        for f in self.samples:
+            score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
+            if score > max_score:
+                score = max_score
+                logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%')
+
+        return max_score > 90
+
+
diff --git a/modules/scanmodule.py b/modules/scanmodule.py
new file mode 100644
index 0000000..6ace387
--- /dev/null
+++ b/modules/scanmodule.py
@@ -0,0 +1,56 @@
+import ConfigParser
+import pickle
+
+
+class ScanModule(object):
+    def __init__(self):
+        self.config = ConfigParser.ConfigParser()
+        self.config.read('modules.conf')
+
+        self.samples = dict()
+
+        try:
+            self.populate(self.config.get(self.name, 'samples'))
+        except ConfigParser.NoOptionError:
+            pass
+
+        try:
+            self.load(self.config.get(self.name, 'persistence'))
+        except ConfigParser.NoOptionError:
+            pass
+
+    def is_disable(self):
+        try:
+            return self.config.getboolean(self.name, 'disable')
+        except ConfigParser.NoOptionError:
+            return False
+
+    def evaluate(self, path):
+        ''' Return in percent, the probability that
+        the file is a malware
+        @param path File to evaluate
+        '''
+        raise NotImplemented
+
+    def populate(self, path):
+        ''' Populate the module's internal database
+        with data from the given path
+        @param path Path to the data
+        '''
+        raise NotImplemented
+
+
+    def load(self, path):
+        ''' Unpickle the given path, and updates the samples dict with it.
+        @param path Path to the dict to unpickle
+        '''
+        with open(path, 'r') as f:
+            self.samples.update(pickle.load(f))
+
+    def save(self, path):
+        ''' Save the database to the given file
+        @param path Path where to save the database
+        '''
+        with open(path, 'w') as f:
+            pickle.dump(self.samples, f)
+
diff --git a/modules/whitelist.py b/modules/whitelist.py
new file mode 100644
index 0000000..587b392
--- /dev/null
+++ b/modules/whitelist.py
@@ -0,0 +1,46 @@
+import os
+import hashlib
+import scanmodule
+
+def main():
+    return HashWhitelist()
+
+class HashWhitelist(scanmodule.ScanModule):
+    name = 'hashwhitelist'
+    def evaluate(self, path):
+        ''' Return in percent, the probability that
+        the file is a malware
+        @param path File to evaluate
+        '''
+        sha1 = ''
+        with open(path, 'r') as f:
+            sha1 = hashlib.sha1(f.read()).hexdigest()
+
+        lst = list()
+        for f in self.samples:
+            if sha1 == self.samples[f]:
+                lst.append([f, 100])
+        return sorted(lst, key=lambda lst: lst[1], reverse=True)
+
+    def is_malware(self, path):
+        ''' Return False if the file is whitelisted
+        '''
+        sha1 = ''
+        with open(path, 'r') as f:
+            sha1 = hashlib.sha1(f.read()).hexdigest()
+
+        for f in self.samples:
+            if sha1 == self.samples[f]:
+                return False
+        return True
+
+    def populate(self, path):
+        ''' Populate the module's internal database
+        with data from the given path
+        @param path Path to the data
+        '''
+        for root, _, filenames in os.walk(path):
+            for filename in filenames:
+                full_path = os.path.join(root, filename)
+                with open(full_path, 'r') as f:
+                    self.samples[full_path] = hashlib.sha1(f.read()).hexdigest()
-- 
cgit v1.3