From 6beeeebe3c43f0643e521139d3f8b1ff4a7f3059 Mon Sep 17 00:00:00 2001
From: Julien Voisin
Date: Thu, 5 Mar 2015 15:36:22 +0100
Subject: Yara is cooler than Python

---
 modules/__init__.py    |   0
 modules/entropy.py     |  56 ------------
 modules/grep_count.py  | 234 -------------------------------------------------
 modules/levenshtein.py |  73 ---------------
 modules/libfuzzy.py    |  98 ---------------------
 modules/scanmodule.py  |  56 ------------
 modules/whitelist.py   |  46 ----------
 7 files changed, 563 deletions(-)
 delete mode 100644 modules/__init__.py
 delete mode 100644 modules/entropy.py
 delete mode 100644 modules/grep_count.py
 delete mode 100644 modules/levenshtein.py
 delete mode 100644 modules/libfuzzy.py
 delete mode 100644 modules/scanmodule.py
 delete mode 100644 modules/whitelist.py

(limited to 'modules')

diff --git a/modules/__init__.py b/modules/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/modules/entropy.py b/modules/entropy.py
deleted file mode 100644
index 48b2924..0000000
--- a/modules/entropy.py
+++ /dev/null
@@ -1,56 +0,0 @@
-''' This module uses shannon's Entropy to detect packed malwares
-'''
-import os
-import math
-import logging
-logging.basicConfig(level=logging.DEBUG)
-
-import scanmodule
-
-def main():
-    return Entropy()
-
-class Entropy(scanmodule.ScanModule):
-    name = 'entropy'
-    def populate(self, path):
-        pass
-    def load(self, path):
-        pass
-    def save(self, path):
-        pass
-
-    def __compute_score(self, path):
-        return (self.__entropy(path) - 5) * 100
-
-    def is_malware(self, path):
-        score = self.__compute_score(path)
-        logging.info('Entropy score for ' + path + ' : ' + str(score))
-        return  score > 75
-
-    def evaluate(self, path):
-        ''' Computes an arbitraty score for the given path
-        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
-        '''
-        score = self.__compute_score(path)
-        if score > 0:
-            return [['MALWARE', score],]
-        return None
-
-    def __entropy(self, path):
-        ''' Computes shannon's entropy for the given file
-        @param path Path to the file
-        '''
-        # Computes the frequency of each byte in the file
-        fsize = max(float(os.path.getsize(path)), 1.0)
-
-        freq = [0] * 256
-        with open(path, 'rb') as f:
-            for c in f.read():
-                freq[ord(c)] += 1
-
-        entropy = 0.0
-        for f in freq:
-            if f:
-                f /= fsize
-                entropy += f * math.log(f, 2)
-        return -entropy
diff --git a/modules/grep_count.py b/modules/grep_count.py
deleted file mode 100644
index 2431960..0000000
--- a/modules/grep_count.py
+++ /dev/null
@@ -1,234 +0,0 @@
-''' This module count the occurences of dodgy terms present in a file
-'''
-import os
-import logging
-logging.basicConfig(level=logging.DEBUG)
-
-import scanmodule
-
-def main():
-    return GrepCount()
-
-class GrepCount(scanmodule.ScanModule):
-    name = 'grep count'
-
-    # ranked from 1 to 10, 10 being EVIL
-    # Also, 100 is awarded to MEGA-DUH-OBVIOUS things.
-    dodgy_terms = {
-            '$GLOBALS': 6,
-            'WWW-Authenticate': 7,
-            'ZipArchive': 6,
-            'apache_get_modules': 8,
-            'assert': 5,
-            'base64_decode': 7,
-            'bzdecompress': 7,
-            'chmod': 8,
-            'curl_init("file://': 100,  # safe mode bypass exploit
-            #'dl': 10,
-            #'exec': 10,
-            'eval(': 10,
-            'eval(base64_decode': 100,
-            'eval($_GET': 100,
-            'eval($_POST': 100,
-            'eval($_REQUEST': 100,
-            'eval(base64_decode': 100,
-            'eval(gzinflate': 100,
-            'file_get_contents': 6,
-            'fpassthru': 100,
-            'fsockopen': 7,
-            'ftp_connect': 7,
-            'ftp_exec': 7,
-            'ftp_login': 7,
-            'function_exists': 4,
-            'get_current_user': 10,
-            'getcwd': 8,
-            'getenv': 9,
-            'getmxrr': 5,
-            'getmygid': 10,
-            'getmygid': 10,
-            'getmyinode': 10,
-            'getmypid': 10,
-            'getmyuid': 10,
-            'gzinflate': 7,
-            'gzinflate(base64_decode(': 100,
-            'gzuncompress': 7,
-            'ini_get': 6,
-            'ini_set': 6,
-            'is_readable': 10,
-            'mysql_get_client_info': 7,
-            'open_basedir': 9,
-            'passthru': 10,
-            'passthru($_GET': 100,
-            'passthru($_POST': 100,
-            'passthru($_REQUEST': 100,
-            'pclose': 9,
-            'pcntl_fork': 10,
-            'php_logo_guid': 10,
-            'php_uname': 8,
-            'phpcredits': 10,
-            'phpinfo': 10,
-            'phpversion': 5,
-            'pnctl_exec': 10,
-            'pnctl_fork': 10,
-            'popen': 10,
-            'posix_getegid': 10,
-            'posix_geteuid': 10,
-            'posix_getgetgruid': 10,
-            'posix_getpwuid': 10,
-            'posix_kill': 10,
-            'posix_mkfifo': 10,
-            'posix_setgid': 10,
-            'posix_setpgid': 10,
-            'posix_setsid': 10,
-            'posix_setuid': 10,
-            'posix_uname': 10,
-            'php://input': 7,
-            'proc_close': 10,
-            'proc_get_status': 10,
-            'proc_nice': 10,
-            'proc_open': 10,
-            'proc_terminate': 10,
-            'putenv': 10,
-            'putenv("PHP': 100,  # Shellshock exploit
-            'putenv(\'PHP': 100,  # Shellshock exploit
-            'safe_mode': 10,
-            'shell_exec': 10,
-            'show_source': 10,
-            'socket_create(AF_INET, SOCK_STREAM, SOL_TCP)': 10,  # Used for SYN flood
-            'symlink': 8,
-            'system(': 9,
-            'system($_GET': 100,
-            'system($_POST': 100,
-            'system($_REQUEST': 100,
-            'win_shell_execute': 10,
-            'win_create_service': 100,
-            'wscript': 8,
-            'zend_logo_guid': 10,
-            'zend_thread_id': 9,
-            'zend_version': 9,
-            }
-
-    dodgy_terms.update({
-            '/bin/bash ': 100,
-            '/bin/sh ': 100,
-            '/etc/hosts': 100,
-            '/etc/passwd': 100,
-            '/etc/resolv.conf ': 100,
-            '/etc/shadow': 100,
-            '/etc/syslog.conf': 100,
-            '/proc/cpuinfo': 10,
-            '/tmp': 8,
-            '/var/cpanel/accounting.log': 100,
-            'IRC server': 100,
-            'LD_PRELOAD': 100,
-            'PRIVMSG': 100,
-            'Safe Mod Bypass': 100,
-            'Shell ': 9,
-            '\\x': 3,  # Shellcodes
-            '\x00/../': 100,  # safe mode bypass
-            'backdoor': 10,
-            'bypass': 8,
-            'chkrootkit': 100,
-            'chmod 777': 7,
-            'cmd.exe': 100,
-            'dir /OG /X': 100,
-            'find . -type f': 100,
-            'gcc ': 8,
-            'id_rsa': 100,
-            'ipconfig /all': 100,
-            'jschl_vc': 100,  # Cloudflare bypass
-            'jschl_answer': 100,  # Cloudflare bypass
-            'kernel32.dll': 100,
-            'ls -la': 100,
-            'milw0rm': 100,
-            'my.cnf': 100,
-            'my.conf': 100,
-            'nc -l': 100,
-            'netstat ': 100,
-            'file:file://': 100,  # basedir bypass
-            'portsentry': 100,
-            'proftpd.conf': 100,
-            'ps -aux': 100,
-            'rkhunter': 100,
-            'shellcode': 100,
-            'slowloris': 100,
-            'snort': 100,
-            'system32': 9,
-            'tripwire': 100,
-            'uname -a': 100,
-            'wget': 8,
-            'WinExec': 10,
-            })
-
-    dodgy_terms.update({
-        '/cdn-cgi/l/chk_jschl': 100,  # Cloudflare bypass for DDoS'ing
-        'Antichat Shell': 100,
-        'Cr@zy_King': 100,
-        'KAdot@ngs.ru': 100,
-        'Kacak': 100,
-        'KingDefacer': 100,
-        'SimAttacker': 100,
-        'SoldiersOfAllah': 100,
-        'ak74-team.net': 100,
-        'alturks.com': 100,
-        'egy_spider' : 100,
-        'egyspider.eu' : 100,
-        'exploit-db.com': 100,
-        'forever5pi': 100,
-        'grayhatz.org': 100,
-        'kacaq.blogspot.com': 100,
-        'locus7s.com': 100,
-        'michaeldaw.org': 100,
-        'milw0rm.com': 100,
-        'pentestmonkey': 100,
-        'r57.biz': 100,
-        'r57shell.net': 100,
-        'rootshell-team.info': 100,
-        'simorgh': 100,
-        'thecrowsrew.org': 100,
-        'vnhacker.org': 100,
-        'xdevil.org': 100,
-        'zehirhacker': 100,
-        '~z0mbie': 100,
-        })
-
-    def populate(self, path):
-        ''' Does nothing :<
-        '''
-        pass
-
-    def evaluate(self, path):
-        ''' Check the given file against a list of know dodgy strings.
-        The calculation formulae is empirical.
-        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
-        '''
-        fsize = os.path.getsize(path)
-        if not fsize:
-            return None
-
-        content = ''
-        with open(path, 'r') as f:
-            content = f.read()
-
-        score = 0
-        for key,data in self.dodgy_terms.iteritems():
-            nb = content.find(key) * data
-            if nb > 0:
-                score += nb
-        score /= fsize
-
-        logging.info('Grep score for ' + path + ' : ' + str(score))
-
-        if score > 75:
-            return [['MALWARE', min(score, 100)],]
-        return None
-
-    def load(self, path):
-        pass
-
-    def save(self, path):
-        pass
-
-    def is_malware(self, path):
-        return self.evaluate(path) is not None
-
diff --git a/modules/levenshtein.py b/modules/levenshtein.py
deleted file mode 100644
index 2e854e2..0000000
--- a/modules/levenshtein.py
+++ /dev/null
@@ -1,73 +0,0 @@
-'''
-This modules has a super-awful complexity (something along n^4),
-so I'm quite sure that you don't want to run it by default ;)
-
-Anyway, this modules computes the Levenshtein distance between samples of malwares
-and files to check, to find similarities.
-'''
-import os
-
-import scanmodule
-
-def main():
-    return Levenshtein()
-
-class Levenshtein(scanmodule.ScanModule):
-    name = 'levenshtein'
-    def populate(self, path):
-        ''' We can't really populate the database with Levenshtein scores,
-        but we can speedup the calculation by storing files lenghts
-        '''
-        for root, _, filenames in os.walk(path):
-            for filename in filenames:
-                full_path = os.path.join(root, filename)
-                with open(full_path, 'r') as f:
-                    self.samples[full_path] = [os.path.getsize(full_path), f.read().lower()]
-
-    def evaluate(self, path):
-        ''' Compare the hash of the given path to every samples one.
-        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
-        '''
-        file_to_test = path
-        file_size = os.path.getsize(file_to_test)
-
-        lst = list()
-        for sample_name, sample_intel in self.samples.iteritems():
-            if sample_name != file_to_test:
-                score = self.__levenshtein(file_to_test, sample_intel[1])
-                score = score / ((file_size + sample_intel[0]) / 2.0)  # mean value
-                if score > 25:  # if the match is under 10%, we don't care
-                    lst.append([sample_name, score * 10])
-        return sorted(lst, key=lambda lst: lst[1], reverse=True)
-
-    def __levenshtein_file(self, f, b):
-        ''' Computes the Levenshtein's distance between a file and a buffer
-        @param f1 File
-        @param fs2 Buffer
-        @return The levenshtein distance
-        '''
-        with open(f, 'r') as of:
-            return self.__levenshtein(of.read().lower(), b)
-
-    def __levenshtein(self, s1, s2):
-        ''' Computes the Levenshtein's distance between two strings
-        @param s1 First string
-        @param s2 Second string
-        @return The levenshtein distance
-        '''
-
-        if len(s1) < len(s2):  # Minimize computation
-            s1, s2 = s2, s1
-
-        previous_row = range(len(s2) + 1)
-        for i, c1 in enumerate(s1):
-            current_row = [i + 1]
-            for j, c2 in enumerate(s2):
-                insertions = previous_row[j + 1] + 1
-                deletions = current_row[j] + 1
-                substitutions = previous_row[j] + (c1 != c2)
-                current_row.append(min(insertions, deletions, substitutions))
-            previous_row = current_row
-
-        return previous_row[-1]
-
diff --git a/modules/libfuzzy.py b/modules/libfuzzy.py
deleted file mode 100644
index a0d3f15..0000000
--- a/modules/libfuzzy.py
+++ /dev/null
@@ -1,98 +0,0 @@
-''' Ugly-pseudo-bindings to libfuzzy (used by ssdeep) to check
-if a file is similar to a given list of samples
-'''
-
-import os
-import ctypes
-import pickle
-import sys
-import logging
-logging.basicConfig(level=logging.DEBUG)
-
-import scanmodule
-
-
-SPAMSUM_LENGTH = 64
-FUZZY_MAX_RESULT = SPAMSUM_LENGTH + SPAMSUM_LENGTH // 2 + 20
-
-def main():
-    return FuzzyMatcher()
-
-class FuzzyMatcher(scanmodule.ScanModule):
-    name = 'libfuzzy'
-    def __init__(self, samples_path=None, persistence_path=None):
-        self.__initialize_libfuzzy()
-        super(FuzzyMatcher, self).__init__()
-
-    def __initialize_libfuzzy(self):
-        ''' Bind to libfuzzy thanks to ctypes.
-        This will create the "fuzzy_hash_buf" and
-        the "fuzzy_compare" methods
-        '''
-        try:
-            fuzzy = ctypes.CDLL('libfuzzy.so')
-        except OSError:
-            print('[-] Please check that you installed libfuzzy')
-            sys.exit(1)
-
-        self.__fuzzy_hash_buf = fuzzy.fuzzy_hash_buf
-        self.__fuzzy_hash_buf.restype = ctypes.c_int
-        self.__fuzzy_hash_buf.argtypes = [
-            ctypes.c_char_p, #buf
-            ctypes.c_uint32, #buf_len
-            ctypes.c_char_p, #result
-        ]
-        self.__fuzzy_compare = fuzzy.fuzzy_compare
-        self.__fuzzy_compare.restype = ctypes.c_int
-        self.__fuzzy_compare.argtypes = [
-            ctypes.c_char_p, #sig1
-            ctypes.c_char_p, #sig2
-        ]
-
-    def populate(self, path):
-        ''' Computes fuzzy hashes of files under the given path,
-        and store them in the dict self.samples with the form dict {name: fuzzy_hash}
-        @param path Path containing the samples
-        '''
-        for root, _, filenames in os.walk(path):
-            for filename in filenames:
-                full_path = os.path.join(root, filename)
-                self.samples[full_path] = self.__hash_from_file(full_path)
-
-    def __hash_from_file(self, path):
-        ''' Return the hash of the given file
-        @param path Path to the file to hash
-        @ret Fuzzy hash of the given file
-        '''
-        with open(path, 'r') as f:
-            out = ctypes.create_string_buffer('\x00' * FUZZY_MAX_RESULT)
-            content = f.read()
-            self.__fuzzy_hash_buf(content, len(content), out)
-            return out.value
-
-    def evaluate(self, path):
-        ''' Compare the hash of the given path to every samples one.
-        @ret A sorted list of the form [name, match_in_percent_superior_to_zero]
-        '''
-        fuzzy_hash = self.__hash_from_file(path)
-
-        lst = list()
-        for f in self.samples:
-            score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
-            if score:
-                lst.append([f, score])
-        return sorted(lst, key=lambda lst: lst[1], reverse=True)
-
-    def is_malware(self, path):
-        max_score = 0
-        fuzzy_hash = self.__hash_from_file(path)
-
-        for f in self.samples:
-            score = self.__fuzzy_compare(fuzzy_hash, self.samples[f])
-            if score > max_score:
-                score = max_score
-                logging.info('fuzzy score for ' + path + ' matches ' + f + ' at ' + str(score) + '%%')
-
-        return max_score > 90
-
-
diff --git a/modules/scanmodule.py b/modules/scanmodule.py
deleted file mode 100644
index 6ace387..0000000
--- a/modules/scanmodule.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import ConfigParser
-import pickle
-
-
-class ScanModule(object):
-    def __init__(self):
-        self.config = ConfigParser.ConfigParser()
-        self.config.read('modules.conf')
-
-        self.samples = dict()
-
-        try:
-            self.populate(self.config.get(self.name, 'samples'))
-        except ConfigParser.NoOptionError:
-            pass
-
-        try:
-            self.load(self.config.get(self.name, 'persistence'))
-        except ConfigParser.NoOptionError:
-            pass
-
-    def is_disable(self):
-        try:
-            return self.config.getboolean(self.name, 'disable')
-        except ConfigParser.NoOptionError:
-            return False
-
-    def evaluate(self, path):
-        ''' Return in percent, the probability that
-        the file is a malware
-        @param path File to evaluate
-        '''
-        raise NotImplemented
-
-    def populate(self, path):
-        ''' Populate the module's internal database
-        with data from the given path
-        @param path Path to the data
-        '''
-        raise NotImplemented
-
-
-    def load(self, path):
-        ''' Unpickle the given path, and updates the samples dict with it.
-        @param path Path to the dict to unpickle
-        '''
-        with open(path, 'r') as f:
-            self.samples.update(pickle.load(f))
-
-    def save(self, path):
-        ''' Save the database to the given file
-        @param path Path where to save the database
-        '''
-        with open(path, 'w') as f:
-            pickle.dump(self.samples, f)
-
diff --git a/modules/whitelist.py b/modules/whitelist.py
deleted file mode 100644
index 587b392..0000000
--- a/modules/whitelist.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import hashlib
-import scanmodule
-
-def main():
-    return HashWhitelist()
-
-class HashWhitelist(scanmodule.ScanModule):
-    name = 'hashwhitelist'
-    def evaluate(self, path):
-        ''' Return in percent, the probability that
-        the file is a malware
-        @param path File to evaluate
-        '''
-        sha1 = ''
-        with open(path, 'r') as f:
-            sha1 = hashlib.sha1(f.read()).hexdigest()
-
-        lst = list()
-        for f in self.samples:
-            if sha1 == self.samples[f]:
-                lst.append([f, 100])
-        return sorted(lst, key=lambda lst: lst[1], reverse=True)
-
-    def is_malware(self, path):
-        ''' Return False if the file is whitelisted
-        '''
-        sha1 = ''
-        with open(path, 'r') as f:
-            sha1 = hashlib.sha1(f.read()).hexdigest()
-
-        for f in self.samples:
-            if sha1 == self.samples[f]:
-                return False
-        return True
-
-    def populate(self, path):
-        ''' Populate the module's internal database
-        with data from the given path
-        @param path Path to the data
-        '''
-        for root, _, filenames in os.walk(path):
-            for filename in filenames:
-                full_path = os.path.join(root, filename)
-                with open(full_path, 'r') as f:
-                    self.samples[full_path] = hashlib.sha1(f.read()).hexdigest()
-- 
cgit v1.3