summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathieu Deous2016-07-26 18:42:53 +0200
committerMathieu Deous2016-07-28 17:56:19 +0200
commitf8228c63bd13aec549a901d9600b17873c4850be (patch)
treea6c8fd240e417f6de5c624b78917b66d2ab12157
parent216702a22dac24e54c88404cf9cd38bca4f0645b (diff)
create script to create whitelists for all versions of given CMS
-rwxr-xr-xphp-malware-finder/utils/mass_whitelist.py181
1 files changed, 181 insertions, 0 deletions
diff --git a/php-malware-finder/utils/mass_whitelist.py b/php-malware-finder/utils/mass_whitelist.py
new file mode 100755
index 0000000..3dd4769
--- /dev/null
+++ b/php-malware-finder/utils/mass_whitelist.py
@@ -0,0 +1,181 @@
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import re
5import sys
6import tarfile
7from copy import copy
8from collections import OrderedDict
9from hashlib import sha1
10from urllib2 import urlopen, HTTPError
11from StringIO import StringIO
12
13import yara
14
15USAGE = """
16USAGE: %(prog)s <NAME> <URL_PATTERN> [<MAJOR> [<MINOR> [<PATCH>]]]
17
18Options:
19 NAME : name of the CMS/whatever being whitelisted
20 URL_PATTERN : download URL with __version__ as a version placeholder
21 MAJOR : minimum and maximum major version to crawl (eg: 1-8, 8)
22 MINOR : minimum and maximum minor version to crawl
23 PATCH : minimum and maximum patch version to crawl
24
25Examples:
26 %(prog)s drupal https://ftp.drupal.org/files/projects/drupal-__version__.tar.gz 9 50
27 %(prog)s drupal https://ftp.drupal.org/files/projects/drupal-__version__.tar.gz 4-9 1-50
28 %(prog)s wordpress https://wordpress.org/wordpress-__version__.tar.gz 4 15
29""" % {'prog': sys.argv[0]}
30
31
32class Opts:
33 DEFAULT_MIN = 0
34 DEFAULT_MAX = 99
35 YARA_RULES = yara.compile('../php.yar', includes=True, error_on_warning=True)
36
37 @classmethod
38 def to_str(cls):
39 values = []
40 for attr in cls.__dict__:
41 if attr.isupper():
42 values.append('%s=%s' % (attr, getattr(cls, attr)))
43 return '<Opts(%s)>' % ' '.join(values)
44
45
46def extract_version_arg(index):
47 min_ver, max_ver = (Opts.DEFAULT_MIN, Opts.DEFAULT_MAX)
48 if len(sys.argv) >= (index + 1):
49 if '-' in sys.argv[index]:
50 min_ver, max_ver = map(int, sys.argv[index].split('-'))
51 else:
52 max_ver = int(sys.argv[index])
53 return min_ver, max_ver
54
55
56def generate_whitelist(version):
57 rules = {}
58
59 # download archive
60 dl_failed = False
61 download_url = Opts.URL_PATTERN.replace('__version__', version)
62 download_url_str = Opts.URL_PATTERN.replace('__version__', '\x1b[1;33m%s\x1b[0m' % version)
63 print "[+] Downloading %s... " % download_url_str,
64 try:
65 resp = urlopen(download_url)
66 resp_code = resp.code
67 except HTTPError as err:
68 dl_failed = True
69 resp_code = err.code
70 if dl_failed or (resp_code != 200):
71 print "\x1b[1;31mFAILED (%d)\x1b[0m" % resp_code
72 return None
73 data = StringIO(resp.read())
74 data.seek(0)
75 print "\x1b[1;32mOK\x1b[0m"
76
77 # extract archive and check against YARA signatures (in-memory)
78 tar = tarfile.open(mode='r:gz', fileobj=data)
79 for entry in tar.getnames():
80 entry_fd = tar.extractfile(entry)
81 if entry_fd is None:
82 continue
83 entry_data = entry_fd.read()
84 matches = Opts.YARA_RULES.match(data=entry_data, fast=True)
85 if matches:
86 rules['/'.join(entry.split('/')[1:])] = sha1(entry_data).hexdigest()
87
88 return rules
89
90
91# init vars
92whitelists = OrderedDict()
93
94# check args
95if (len(sys.argv) < 3) or (len(sys.argv) > 6):
96 print(USAGE)
97 sys.exit(1)
98
99# parse args
100Opts.CMS_NAME = sys.argv[1]
101Opts.URL_PATTERN = sys.argv[2]
102Opts.MIN_MAJOR, Opts.MAX_MAJOR = extract_version_arg(3)
103Opts.MIN_MINOR, Opts.MAX_MINOR = extract_version_arg(4)
104Opts.MIN_PATCH, Opts.MAX_PATCH = extract_version_arg(5)
105
106# loop over possible versions
107for vmajor in range(Opts.MIN_MAJOR, Opts.MAX_MAJOR + 1):
108 # download without vminor and vpatch (but ignore if it doesn't exist)
109 version = "%d" % vmajor
110 rules = generate_whitelist(version)
111 if (rules is not None) and rules:
112 whitelists[version] = rules
113
114 has_mversion = False
115 first_mloop = True
116 for vminor in range(Opts.MIN_MINOR, Opts.MAX_MINOR + 1):
117 # download without vpatch (but ignore if it doesn't exist)
118 version = "%d.%d" % (vmajor, vminor)
119 rules = generate_whitelist(version)
120 if rules is not None:
121 has_mversion = True
122 if rules:
123 whitelists[version] = rules
124 if (rules is None) and (has_mversion or not first_mloop):
125 break
126 first_mloop = False
127
128 has_pversion = False
129 first_ploop = True
130 for vpatch in range(Opts.MIN_PATCH, Opts.MAX_PATCH + 1):
131 version = "%d.%d.%d" % (vmajor, vminor, vpatch)
132 rules = generate_whitelist(version)
133 if rules is not None:
134 has_pversion = True
135 if rules:
136 whitelists[version] = rules
137 # break loop if download failed and:
138 # - a version has already been found during this loop
139 # - this is the 2nd iteration (if a version wasn't found, it means download failed twice)
140 if (rules is None) and (has_pversion or not first_ploop):
141 break
142 first_ploop = False
143
144# remove duplicate entries:
145known_files = []
146for version, rules in copy(whitelists.items()):
147 used_rules = 0
148 for filename, digest in rules.items():
149 rtuple = (filename, digest)
150 if rtuple in known_files:
151 del whitelists[version][filename]
152 else:
153 known_files.append(rtuple)
154 used_rules += 1
155 if used_rules == 0:
156 del whitelists[version]
157
158# build final rule
159prefix = 8 * ' '
160conditions = []
161len_wl = len(whitelists.keys()) - 1
162for index, (version, rules) in enumerate(whitelists.items()):
163 cond_str = '%s/* %s %s */\n' % (prefix, Opts.CMS_NAME.title(), version)
164 len_rules = len(rules.keys()) - 1
165 for inner_index, (filename, digest) in enumerate(rules.items()):
166 if (index == len_wl) and (inner_index == len_rules): # last loop iteration
167 cond_str += '%shash.sha1(0, filesize) == "%s" // %s\n' % (prefix, digest, filename)
168 else:
169 cond_str += '%shash.sha1(0, filesize) == "%s" or // %s\n' % (prefix, digest, filename)
170 conditions.append(cond_str)
171
172final_rule = """
173import "hash"
174
175rule %(name)s
176{
177 condition:
178%(conditions)s
179}
180""" % {'name': Opts.CMS_NAME, 'conditions': '\n'.join(conditions)}
181print final_rule