import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn import preprocessing
import string
from urllib.parse import urlparse, parse_qs
columns_str = 'frame.number,frame.len,frame.time,frame.time_epoch,frame.protocols,eth.src,eth.dst,eth.type,ip.src,ip.dst,ip.len,ip.ttl,ip.flags,ip.frag_offset,ip.proto,ip.version,ip.dsfield,ip.checksum,tcp.srcport,tcp.dstport,tcp.len,tcp.seq,tcp.ack,tcp.flags,tcp.flags.syn,tcp.flags.ack,tcp.flags.fin,tcp.flags.reset,tcp.window_size,tcp.checksum,tcp.stream,udp.srcport,udp.dstport,udp.length,udp.checksum,icmp.type,icmp.code,icmp.checksum,http.request.method,http.request.uri,http.request.version,http.request.full_uri,http.response.code,http.user_agent,http.content_length_header,http.content_type,http.cookie,http.host,http.referer,http.location,http.authorization,http.connection,dns.qry.name,dns.qry.type,dns.qry.class,dns.flags.response,dns.flags.recdesired,dns.flags.rcode,dns.resp.ttl,dns.resp.len,smtp.req.command,smtp.data.fragment,pop.request.command,pop.response,imap.request.command,imap.response,ftp.request.command,ftp.request.arg,ftp.response.code,ftp.response.arg,ipv6.src,ipv6.dst,ipv6.plen,alert'
data_columns = columns_str.split(',')
data_columns
['frame.number',
'frame.len',
'frame.time',
'frame.time_epoch',
'frame.protocols',
'eth.src',
'eth.dst',
'eth.type',
'ip.src',
'ip.dst',
'ip.len',
'ip.ttl',
'ip.flags',
'ip.frag_offset',
'ip.proto',
'ip.version',
'ip.dsfield',
'ip.checksum',
'tcp.srcport',
'tcp.dstport',
'tcp.len',
'tcp.seq',
'tcp.ack',
'tcp.flags',
'tcp.flags.syn',
'tcp.flags.ack',
'tcp.flags.fin',
'tcp.flags.reset',
'tcp.window_size',
'tcp.checksum',
'tcp.stream',
'udp.srcport',
'udp.dstport',
'udp.length',
'udp.checksum',
'icmp.type',
'icmp.code',
'icmp.checksum',
'http.request.method',
'http.request.uri',
'http.request.version',
'http.request.full_uri',
'http.response.code',
'http.user_agent',
'http.content_length_header',
'http.content_type',
'http.cookie',
'http.host',
'http.referer',
'http.location',
'http.authorization',
'http.connection',
'dns.qry.name',
'dns.qry.type',
'dns.qry.class',
'dns.flags.response',
'dns.flags.recdesired',
'dns.flags.rcode',
'dns.resp.ttl',
'dns.resp.len',
'smtp.req.command',
'smtp.data.fragment',
'pop.request.command',
'pop.response',
'imap.request.command',
'imap.response',
'ftp.request.command',
'ftp.request.arg',
'ftp.response.code',
'ftp.response.arg',
'ipv6.src',
'ipv6.dst',
'ipv6.plen',
'alert']
df = pd.read_csv('attack-simulation-http-url.csv', usecols = ['http.request.uri'], names=data_columns, header=None, low_memory=False)
df.head(10)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
0 |
/dvwa/config/ |
1 |
/dvwa/docs/ |
2 |
/dvwa/external/ |
3 |
/mutillidae/ajax/ |
4 |
/mutillidae/classes/ |
5 |
/mutillidae/data/ |
6 |
/mutillidae/documentation/ |
7 |
/mutillidae/images/ |
8 |
/mutillidae/includes/ |
9 |
/mutillidae/javascript/ |
#Preprocessing
def uri_len(uri):
uri_str = str(uri)
return len(uri_str)
df['uri_len'] = df['http.request.uri'].apply(uri_len)
df.head(10)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
0 |
/dvwa/config/ |
13 |
1 |
/dvwa/docs/ |
11 |
2 |
/dvwa/external/ |
15 |
3 |
/mutillidae/ajax/ |
17 |
4 |
/mutillidae/classes/ |
20 |
5 |
/mutillidae/data/ |
17 |
6 |
/mutillidae/documentation/ |
26 |
7 |
/mutillidae/images/ |
19 |
8 |
/mutillidae/includes/ |
21 |
9 |
/mutillidae/javascript/ |
23 |
def count_uri_segments(uri):
parsed_uri = urlparse(str(uri))
path = parsed_uri.path.strip('/') # Remove leading and trailing slashes
segments = path.split('/')
return len(segments)
df['uri_segs'] = df['http.request.uri'].apply(count_uri_segments)
df.head(10)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
0 |
/dvwa/config/ |
13 |
2 |
1 |
/dvwa/docs/ |
11 |
2 |
2 |
/dvwa/external/ |
15 |
2 |
3 |
/mutillidae/ajax/ |
17 |
2 |
4 |
/mutillidae/classes/ |
20 |
2 |
5 |
/mutillidae/data/ |
17 |
2 |
6 |
/mutillidae/documentation/ |
26 |
2 |
7 |
/mutillidae/images/ |
19 |
2 |
8 |
/mutillidae/includes/ |
21 |
2 |
9 |
/mutillidae/javascript/ |
23 |
2 |
def count_special_chars(uri):
special_chars = []
for char in str(uri):
if char not in string.ascii_letters + string.digits + '/':
special_chars.append(char)
return len(special_chars)
df['uri_s_chars'] = df['http.request.uri'].apply(count_special_chars)
df.head(30)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
0 |
/dvwa/config/ |
13 |
2 |
0 |
1 |
/dvwa/docs/ |
11 |
2 |
0 |
2 |
/dvwa/external/ |
15 |
2 |
0 |
3 |
/mutillidae/ajax/ |
17 |
2 |
0 |
4 |
/mutillidae/classes/ |
20 |
2 |
0 |
5 |
/mutillidae/data/ |
17 |
2 |
0 |
6 |
/mutillidae/documentation/ |
26 |
2 |
0 |
7 |
/mutillidae/images/ |
19 |
2 |
0 |
8 |
/mutillidae/includes/ |
21 |
2 |
0 |
9 |
/mutillidae/javascript/ |
23 |
2 |
0 |
10 |
/mutillidae/passwords/ |
22 |
2 |
0 |
11 |
/mutillidae/phpmyadmin/ |
23 |
2 |
0 |
12 |
/mutillidae/styles/ |
19 |
2 |
0 |
13 |
/mutillidae/test/ |
17 |
2 |
0 |
14 |
/mutillidae/webservices/ |
24 |
2 |
0 |
15 |
/mutillidae/phpmyadmin/examples/ |
32 |
3 |
0 |
16 |
/mutillidae/phpmyadmin/js/ |
26 |
3 |
0 |
17 |
/mutillidae/phpmyadmin/locale/ |
30 |
3 |
0 |
18 |
/mutillidae/phpmyadmin/setup/ |
29 |
3 |
0 |
19 |
/mutillidae/phpmyadmin/themes/ |
30 |
3 |
0 |
20 |
/assets/ |
8 |
1 |
0 |
21 |
/cgi-bin/ |
9 |
1 |
1 |
22 |
/evil/ |
6 |
1 |
0 |
23 |
/gallery2/ |
10 |
1 |
0 |
24 |
NaN |
3 |
1 |
0 |
25 |
/gallery2/main.php |
18 |
2 |
1 |
26 |
/icon/ |
6 |
1 |
0 |
27 |
/images/ |
8 |
1 |
0 |
28 |
/javascript/ |
12 |
1 |
0 |
29 |
/joomla/ |
8 |
1 |
0 |
def check_reserved_characters(uri):
parsed_uri = urlparse(str(uri))
path = parsed_uri.path.strip('/') # Remove leading and trailing slashes
reserved_chars = "!*'();:@&=+$,/?#[]"
reserved_char_count = sum(c in reserved_chars for c in path)
return reserved_char_count
df['uri_res_chars'] = df['http.request.uri'].apply(check_reserved_characters)
df.head(30)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
0 |
/dvwa/config/ |
13 |
2 |
0 |
1 |
1 |
/dvwa/docs/ |
11 |
2 |
0 |
1 |
2 |
/dvwa/external/ |
15 |
2 |
0 |
1 |
3 |
/mutillidae/ajax/ |
17 |
2 |
0 |
1 |
4 |
/mutillidae/classes/ |
20 |
2 |
0 |
1 |
5 |
/mutillidae/data/ |
17 |
2 |
0 |
1 |
6 |
/mutillidae/documentation/ |
26 |
2 |
0 |
1 |
7 |
/mutillidae/images/ |
19 |
2 |
0 |
1 |
8 |
/mutillidae/includes/ |
21 |
2 |
0 |
1 |
9 |
/mutillidae/javascript/ |
23 |
2 |
0 |
1 |
10 |
/mutillidae/passwords/ |
22 |
2 |
0 |
1 |
11 |
/mutillidae/phpmyadmin/ |
23 |
2 |
0 |
1 |
12 |
/mutillidae/styles/ |
19 |
2 |
0 |
1 |
13 |
/mutillidae/test/ |
17 |
2 |
0 |
1 |
14 |
/mutillidae/webservices/ |
24 |
2 |
0 |
1 |
15 |
/mutillidae/phpmyadmin/examples/ |
32 |
3 |
0 |
2 |
16 |
/mutillidae/phpmyadmin/js/ |
26 |
3 |
0 |
2 |
17 |
/mutillidae/phpmyadmin/locale/ |
30 |
3 |
0 |
2 |
18 |
/mutillidae/phpmyadmin/setup/ |
29 |
3 |
0 |
2 |
19 |
/mutillidae/phpmyadmin/themes/ |
30 |
3 |
0 |
2 |
20 |
/assets/ |
8 |
1 |
0 |
0 |
21 |
/cgi-bin/ |
9 |
1 |
1 |
0 |
22 |
/evil/ |
6 |
1 |
0 |
0 |
23 |
/gallery2/ |
10 |
1 |
0 |
0 |
24 |
NaN |
3 |
1 |
0 |
0 |
25 |
/gallery2/main.php |
18 |
2 |
1 |
1 |
26 |
/icon/ |
6 |
1 |
0 |
0 |
27 |
/images/ |
8 |
1 |
0 |
0 |
28 |
/javascript/ |
12 |
1 |
0 |
0 |
29 |
/joomla/ |
8 |
1 |
0 |
0 |
def count_query_parameters(uri):
parsed_uri = urlparse(str(uri))
query_params = parsed_uri.query
parsed_query_params = parse_qs(query_params)
num_query_params = len(parsed_query_params)
return num_query_params
df['uri_q_params'] = df['http.request.uri'].apply(count_query_parameters)
df.tail(30)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
uri_q_params |
1964477 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
139 |
2 |
18 |
1 |
5 |
1964478 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
17 |
1 |
6 |
1964479 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
123 |
2 |
17 |
1 |
5 |
1964480 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
15 |
1 |
6 |
1964481 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
18 |
1 |
5 |
1964482 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
19 |
1 |
6 |
1964483 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
16 |
1 |
5 |
1964484 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
20 |
1 |
6 |
1964485 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
20 |
1 |
5 |
1964486 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
136 |
2 |
19 |
1 |
6 |
1964487 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
21 |
1 |
5 |
1964488 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
141 |
2 |
20 |
1 |
6 |
1964489 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
110 |
2 |
14 |
1 |
5 |
1964490 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
133 |
2 |
18 |
1 |
6 |
1964491 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
115 |
2 |
15 |
1 |
5 |
1964492 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
162 |
2 |
22 |
1 |
6 |
1964493 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
107 |
2 |
13 |
1 |
5 |
1964494 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
167 |
2 |
23 |
1 |
6 |
1964495 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
136 |
2 |
17 |
1 |
5 |
1964496 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
123 |
2 |
16 |
1 |
6 |
1964497 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
141 |
2 |
18 |
1 |
5 |
1964498 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
17 |
1 |
6 |
1964499 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
127 |
2 |
17 |
1 |
5 |
1964500 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
15 |
1 |
6 |
1964501 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
132 |
2 |
18 |
1 |
5 |
1964502 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
19 |
1 |
6 |
1964503 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
124 |
2 |
16 |
1 |
5 |
1964504 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
20 |
1 |
6 |
1964505 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
153 |
2 |
20 |
1 |
5 |
1964506 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
158 |
2 |
21 |
1 |
5 |
def max_query_param_length(uri):
parsed_uri = urlparse(str(uri))
query_params = parsed_uri.query
parsed_query_params = parse_qs(query_params)
max_param_length = 0
for param, values in parsed_query_params.items():
for value in values:
max_param_length = max(max_param_length, len(value))
return max_param_length
df['uri_maxq_len'] = df['http.request.uri'].apply(max_query_param_length)
df.tail(30)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
uri_q_params |
uri_maxq_len |
1964477 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
139 |
2 |
18 |
1 |
5 |
38 |
1964478 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
17 |
1 |
6 |
18 |
1964479 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
123 |
2 |
17 |
1 |
5 |
22 |
1964480 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
15 |
1 |
6 |
14 |
1964481 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
18 |
1 |
5 |
25 |
1964482 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
19 |
1 |
6 |
35 |
1964483 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
16 |
1 |
5 |
21 |
1964484 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
20 |
1 |
6 |
38 |
1964485 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
20 |
1 |
5 |
42 |
1964486 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
136 |
2 |
19 |
1 |
6 |
22 |
1964487 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
21 |
1 |
5 |
45 |
1964488 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
141 |
2 |
20 |
1 |
6 |
25 |
1964489 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
110 |
2 |
14 |
1 |
5 |
15 |
1964490 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
133 |
2 |
18 |
1 |
6 |
21 |
1964491 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
115 |
2 |
15 |
1 |
5 |
18 |
1964492 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
162 |
2 |
22 |
1 |
6 |
42 |
1964493 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
107 |
2 |
13 |
1 |
5 |
14 |
1964494 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
167 |
2 |
23 |
1 |
6 |
45 |
1964495 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
136 |
2 |
17 |
1 |
5 |
35 |
1964496 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
123 |
2 |
16 |
1 |
6 |
15 |
1964497 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
141 |
2 |
18 |
1 |
5 |
38 |
1964498 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
128 |
2 |
17 |
1 |
6 |
18 |
1964499 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
127 |
2 |
17 |
1 |
5 |
22 |
1964500 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
120 |
2 |
15 |
1 |
6 |
14 |
1964501 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
132 |
2 |
18 |
1 |
5 |
25 |
1964502 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
19 |
1 |
6 |
35 |
1964503 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
124 |
2 |
16 |
1 |
5 |
21 |
1964504 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
20 |
1 |
6 |
38 |
1964505 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
153 |
2 |
20 |
1 |
5 |
42 |
1964506 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
158 |
2 |
21 |
1 |
5 |
45 |
# Drop rows with NaN values
df.dropna(inplace=True)
df
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
uri_q_params |
uri_maxq_len |
0 |
/dvwa/config/ |
13 |
2 |
0 |
1 |
0 |
0 |
1 |
/dvwa/docs/ |
11 |
2 |
0 |
1 |
0 |
0 |
2 |
/dvwa/external/ |
15 |
2 |
0 |
1 |
0 |
0 |
3 |
/mutillidae/ajax/ |
17 |
2 |
0 |
1 |
0 |
0 |
4 |
/mutillidae/classes/ |
20 |
2 |
0 |
1 |
0 |
0 |
... |
... |
... |
... |
... |
... |
... |
... |
1964502 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
149 |
2 |
19 |
1 |
6 |
35 |
1964503 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
124 |
2 |
16 |
1 |
5 |
21 |
1964504 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
154 |
2 |
20 |
1 |
6 |
38 |
1964505 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
153 |
2 |
20 |
1 |
5 |
42 |
1964506 |
/awstats/awstats.pl?config=owaspbwa&framename=... |
158 |
2 |
21 |
1 |
5 |
45 |
1914976 rows × 7 columns
# Create an Isolation Forest model
model=IsolationForest(contamination=0.1)
model
<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: scikit-learn/scikit-learn#21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style>IsolationForest(contamination=0.1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.IsolationForest
IsolationForest(contamination=0.1)
# Fit the model on your preprocessed data
required_df = df[['uri_len','uri_segs','uri_s_chars','uri_res_chars','uri_q_params','uri_maxq_len']]
model.fit(required_df)
<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: scikit-learn/scikit-learn#21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style>IsolationForest(contamination=0.1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.IsolationForest
IsolationForest(contamination=0.1)
# Predict anomalies
df['scores']=model.decision_function(required_df)
df['anomaly']=model.predict(required_df)
df.head(20)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
uri_q_params |
uri_maxq_len |
scores |
anomaly |
0 |
/dvwa/config/ |
13 |
2 |
0 |
1 |
0 |
0 |
0.090492 |
1 |
1 |
/dvwa/docs/ |
11 |
2 |
0 |
1 |
0 |
0 |
0.087122 |
1 |
2 |
/dvwa/external/ |
15 |
2 |
0 |
1 |
0 |
0 |
0.096042 |
1 |
3 |
/mutillidae/ajax/ |
17 |
2 |
0 |
1 |
0 |
0 |
0.100049 |
1 |
4 |
/mutillidae/classes/ |
20 |
2 |
0 |
1 |
0 |
0 |
0.102356 |
1 |
5 |
/mutillidae/data/ |
17 |
2 |
0 |
1 |
0 |
0 |
0.100049 |
1 |
6 |
/mutillidae/documentation/ |
26 |
2 |
0 |
1 |
0 |
0 |
0.102650 |
1 |
7 |
/mutillidae/images/ |
19 |
2 |
0 |
1 |
0 |
0 |
0.101738 |
1 |
8 |
/mutillidae/includes/ |
21 |
2 |
0 |
1 |
0 |
0 |
0.104347 |
1 |
9 |
/mutillidae/javascript/ |
23 |
2 |
0 |
1 |
0 |
0 |
0.107356 |
1 |
10 |
/mutillidae/passwords/ |
22 |
2 |
0 |
1 |
0 |
0 |
0.106141 |
1 |
11 |
/mutillidae/phpmyadmin/ |
23 |
2 |
0 |
1 |
0 |
0 |
0.107356 |
1 |
12 |
/mutillidae/styles/ |
19 |
2 |
0 |
1 |
0 |
0 |
0.101738 |
1 |
13 |
/mutillidae/test/ |
17 |
2 |
0 |
1 |
0 |
0 |
0.100049 |
1 |
14 |
/mutillidae/webservices/ |
24 |
2 |
0 |
1 |
0 |
0 |
0.104205 |
1 |
15 |
/mutillidae/phpmyadmin/examples/ |
32 |
3 |
0 |
2 |
0 |
0 |
0.138496 |
1 |
16 |
/mutillidae/phpmyadmin/js/ |
26 |
3 |
0 |
2 |
0 |
0 |
0.145323 |
1 |
17 |
/mutillidae/phpmyadmin/locale/ |
30 |
3 |
0 |
2 |
0 |
0 |
0.140898 |
1 |
18 |
/mutillidae/phpmyadmin/setup/ |
29 |
3 |
0 |
2 |
0 |
0 |
0.141864 |
1 |
19 |
/mutillidae/phpmyadmin/themes/ |
30 |
3 |
0 |
2 |
0 |
0 |
0.140898 |
1 |
anomaly=df[df['anomaly']==-1]
anomaly.head(50)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
http.request.uri |
uri_len |
uri_segs |
uri_s_chars |
uri_res_chars |
uri_q_params |
uri_maxq_len |
scores |
anomaly |
20 |
/assets/ |
8 |
1 |
0 |
0 |
0 |
0 |
-0.031069 |
-1 |
21 |
/cgi-bin/ |
9 |
1 |
1 |
0 |
0 |
0 |
-0.017994 |
-1 |
22 |
/evil/ |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
23 |
/gallery2/ |
10 |
1 |
0 |
0 |
0 |
0 |
-0.023963 |
-1 |
26 |
/icon/ |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
27 |
/images/ |
8 |
1 |
0 |
0 |
0 |
0 |
-0.031069 |
-1 |
28 |
/javascript/ |
12 |
1 |
0 |
0 |
0 |
0 |
-0.020447 |
-1 |
29 |
/joomla/ |
8 |
1 |
0 |
0 |
0 |
0 |
-0.031069 |
-1 |
30 |
/phpBB2/ |
8 |
1 |
0 |
0 |
0 |
0 |
-0.031069 |
-1 |
31 |
/phpmyadmin/ |
12 |
1 |
0 |
0 |
0 |
0 |
-0.020447 |
-1 |
32 |
/test/ |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
33 |
/test/ |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
34 |
/wordpress/ |
11 |
1 |
0 |
0 |
0 |
0 |
-0.021533 |
-1 |
54 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
55 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
56 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
59 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
61 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
66 |
/phpBB2/login.php?redirect=admin/&sid=eb3e... |
74 |
2 |
6 |
1 |
2 |
32 |
-0.164158 |
-1 |
80 |
/nmaplowercheck1685892421 |
25 |
1 |
0 |
0 |
0 |
0 |
-0.005327 |
-1 |
81 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
82 |
/nmaplowercheck1685892421 |
25 |
1 |
0 |
0 |
0 |
0 |
-0.005327 |
-1 |
83 |
/nmaplowercheck1685892421 |
25 |
1 |
0 |
0 |
0 |
0 |
-0.005327 |
-1 |
84 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
85 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
91 |
/HNAP1 |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
92 |
/HNAP1 |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
95 |
/HNAP1 |
6 |
1 |
0 |
0 |
0 |
0 |
-0.034518 |
-1 |
99 |
/wordpress/wp-login.php?redirect_to=%2Fwordpre... |
62 |
2 |
9 |
1 |
1 |
20 |
-0.110149 |
-1 |
102 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
103 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
104 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
105 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
106 |
/ |
1 |
1 |
0 |
0 |
0 |
0 |
-0.035260 |
-1 |
113 |
/randomfile1 |
12 |
1 |
0 |
0 |
0 |
0 |
-0.020447 |
-1 |
114 |
/frand2 |
7 |
1 |
0 |
0 |
0 |
0 |
-0.030701 |
-1 |
115 |
/.bash_history |
14 |
1 |
2 |
0 |
0 |
0 |
-0.022261 |
-1 |
116 |
/.bashrc |
8 |
1 |
1 |
0 |
0 |
0 |
-0.021757 |
-1 |
117 |
/.cache |
7 |
1 |
1 |
0 |
0 |
0 |
-0.021395 |
-1 |
118 |
/.config |
8 |
1 |
1 |
0 |
0 |
0 |
-0.021757 |
-1 |
119 |
/.cvs |
5 |
1 |
1 |
0 |
0 |
0 |
-0.025407 |
-1 |
120 |
/.cvsignore |
11 |
1 |
1 |
0 |
0 |
0 |
-0.012127 |
-1 |
121 |
/.forward |
9 |
1 |
1 |
0 |
0 |
0 |
-0.017994 |
-1 |
123 |
/.history |
9 |
1 |
1 |
0 |
0 |
0 |
-0.017994 |
-1 |
124 |
/.hta |
5 |
1 |
1 |
0 |
0 |
0 |
-0.025407 |
-1 |
125 |
/.hta_ |
6 |
1 |
2 |
0 |
0 |
0 |
-0.036611 |
-1 |
126 |
/.htaccess |
10 |
1 |
1 |
0 |
0 |
0 |
-0.014515 |
-1 |
127 |
/.htaccess_ |
11 |
1 |
2 |
0 |
0 |
0 |
-0.028929 |
-1 |
128 |
/.htpasswd |
10 |
1 |
1 |
0 |
0 |
0 |
-0.014515 |
-1 |
129 |
/.htpasswd_ |
11 |
1 |
2 |
0 |
0 |
0 |
-0.028929 |
-1 |