x00itachi / isolationforest-exp-test

Exploring Isolationforest model w.r.t http request uri to detect anomalies

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn import preprocessing
import string
from urllib.parse import urlparse, parse_qs
columns_str = 'frame.number,frame.len,frame.time,frame.time_epoch,frame.protocols,eth.src,eth.dst,eth.type,ip.src,ip.dst,ip.len,ip.ttl,ip.flags,ip.frag_offset,ip.proto,ip.version,ip.dsfield,ip.checksum,tcp.srcport,tcp.dstport,tcp.len,tcp.seq,tcp.ack,tcp.flags,tcp.flags.syn,tcp.flags.ack,tcp.flags.fin,tcp.flags.reset,tcp.window_size,tcp.checksum,tcp.stream,udp.srcport,udp.dstport,udp.length,udp.checksum,icmp.type,icmp.code,icmp.checksum,http.request.method,http.request.uri,http.request.version,http.request.full_uri,http.response.code,http.user_agent,http.content_length_header,http.content_type,http.cookie,http.host,http.referer,http.location,http.authorization,http.connection,dns.qry.name,dns.qry.type,dns.qry.class,dns.flags.response,dns.flags.recdesired,dns.flags.rcode,dns.resp.ttl,dns.resp.len,smtp.req.command,smtp.data.fragment,pop.request.command,pop.response,imap.request.command,imap.response,ftp.request.command,ftp.request.arg,ftp.response.code,ftp.response.arg,ipv6.src,ipv6.dst,ipv6.plen,alert'
data_columns = columns_str.split(',')
data_columns
['frame.number',
 'frame.len',
 'frame.time',
 'frame.time_epoch',
 'frame.protocols',
 'eth.src',
 'eth.dst',
 'eth.type',
 'ip.src',
 'ip.dst',
 'ip.len',
 'ip.ttl',
 'ip.flags',
 'ip.frag_offset',
 'ip.proto',
 'ip.version',
 'ip.dsfield',
 'ip.checksum',
 'tcp.srcport',
 'tcp.dstport',
 'tcp.len',
 'tcp.seq',
 'tcp.ack',
 'tcp.flags',
 'tcp.flags.syn',
 'tcp.flags.ack',
 'tcp.flags.fin',
 'tcp.flags.reset',
 'tcp.window_size',
 'tcp.checksum',
 'tcp.stream',
 'udp.srcport',
 'udp.dstport',
 'udp.length',
 'udp.checksum',
 'icmp.type',
 'icmp.code',
 'icmp.checksum',
 'http.request.method',
 'http.request.uri',
 'http.request.version',
 'http.request.full_uri',
 'http.response.code',
 'http.user_agent',
 'http.content_length_header',
 'http.content_type',
 'http.cookie',
 'http.host',
 'http.referer',
 'http.location',
 'http.authorization',
 'http.connection',
 'dns.qry.name',
 'dns.qry.type',
 'dns.qry.class',
 'dns.flags.response',
 'dns.flags.recdesired',
 'dns.flags.rcode',
 'dns.resp.ttl',
 'dns.resp.len',
 'smtp.req.command',
 'smtp.data.fragment',
 'pop.request.command',
 'pop.response',
 'imap.request.command',
 'imap.response',
 'ftp.request.command',
 'ftp.request.arg',
 'ftp.response.code',
 'ftp.response.arg',
 'ipv6.src',
 'ipv6.dst',
 'ipv6.plen',
 'alert']
df = pd.read_csv('attack-simulation-http-url.csv', usecols = ['http.request.uri'], names=data_columns, header=None, low_memory=False)
df.head(10)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri
0 /dvwa/config/
1 /dvwa/docs/
2 /dvwa/external/
3 /mutillidae/ajax/
4 /mutillidae/classes/
5 /mutillidae/data/
6 /mutillidae/documentation/
7 /mutillidae/images/
8 /mutillidae/includes/
9 /mutillidae/javascript/
#Preprocessing

def uri_len(uri):
    uri_str = str(uri)
    return len(uri_str)

df['uri_len'] = df['http.request.uri'].apply(uri_len)
df.head(10)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len
0 /dvwa/config/ 13
1 /dvwa/docs/ 11
2 /dvwa/external/ 15
3 /mutillidae/ajax/ 17
4 /mutillidae/classes/ 20
5 /mutillidae/data/ 17
6 /mutillidae/documentation/ 26
7 /mutillidae/images/ 19
8 /mutillidae/includes/ 21
9 /mutillidae/javascript/ 23
def count_uri_segments(uri):
    parsed_uri = urlparse(str(uri))
    path = parsed_uri.path.strip('/')  # Remove leading and trailing slashes
    segments = path.split('/')
    return len(segments)

df['uri_segs'] = df['http.request.uri'].apply(count_uri_segments)
df.head(10)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs
0 /dvwa/config/ 13 2
1 /dvwa/docs/ 11 2
2 /dvwa/external/ 15 2
3 /mutillidae/ajax/ 17 2
4 /mutillidae/classes/ 20 2
5 /mutillidae/data/ 17 2
6 /mutillidae/documentation/ 26 2
7 /mutillidae/images/ 19 2
8 /mutillidae/includes/ 21 2
9 /mutillidae/javascript/ 23 2
def count_special_chars(uri):
    special_chars = []
    for char in str(uri):
        if char not in string.ascii_letters + string.digits + '/':
                    special_chars.append(char)
    return len(special_chars)

df['uri_s_chars'] = df['http.request.uri'].apply(count_special_chars)
df.head(30)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars
0 /dvwa/config/ 13 2 0
1 /dvwa/docs/ 11 2 0
2 /dvwa/external/ 15 2 0
3 /mutillidae/ajax/ 17 2 0
4 /mutillidae/classes/ 20 2 0
5 /mutillidae/data/ 17 2 0
6 /mutillidae/documentation/ 26 2 0
7 /mutillidae/images/ 19 2 0
8 /mutillidae/includes/ 21 2 0
9 /mutillidae/javascript/ 23 2 0
10 /mutillidae/passwords/ 22 2 0
11 /mutillidae/phpmyadmin/ 23 2 0
12 /mutillidae/styles/ 19 2 0
13 /mutillidae/test/ 17 2 0
14 /mutillidae/webservices/ 24 2 0
15 /mutillidae/phpmyadmin/examples/ 32 3 0
16 /mutillidae/phpmyadmin/js/ 26 3 0
17 /mutillidae/phpmyadmin/locale/ 30 3 0
18 /mutillidae/phpmyadmin/setup/ 29 3 0
19 /mutillidae/phpmyadmin/themes/ 30 3 0
20 /assets/ 8 1 0
21 /cgi-bin/ 9 1 1
22 /evil/ 6 1 0
23 /gallery2/ 10 1 0
24 NaN 3 1 0
25 /gallery2/main.php 18 2 1
26 /icon/ 6 1 0
27 /images/ 8 1 0
28 /javascript/ 12 1 0
29 /joomla/ 8 1 0
def check_reserved_characters(uri):
    parsed_uri = urlparse(str(uri))
    path = parsed_uri.path.strip('/')  # Remove leading and trailing slashes
    reserved_chars = "!*'();:@&=+$,/?#[]"
    reserved_char_count = sum(c in reserved_chars for c in path)
    return reserved_char_count

df['uri_res_chars'] = df['http.request.uri'].apply(check_reserved_characters)
df.head(30)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars
0 /dvwa/config/ 13 2 0 1
1 /dvwa/docs/ 11 2 0 1
2 /dvwa/external/ 15 2 0 1
3 /mutillidae/ajax/ 17 2 0 1
4 /mutillidae/classes/ 20 2 0 1
5 /mutillidae/data/ 17 2 0 1
6 /mutillidae/documentation/ 26 2 0 1
7 /mutillidae/images/ 19 2 0 1
8 /mutillidae/includes/ 21 2 0 1
9 /mutillidae/javascript/ 23 2 0 1
10 /mutillidae/passwords/ 22 2 0 1
11 /mutillidae/phpmyadmin/ 23 2 0 1
12 /mutillidae/styles/ 19 2 0 1
13 /mutillidae/test/ 17 2 0 1
14 /mutillidae/webservices/ 24 2 0 1
15 /mutillidae/phpmyadmin/examples/ 32 3 0 2
16 /mutillidae/phpmyadmin/js/ 26 3 0 2
17 /mutillidae/phpmyadmin/locale/ 30 3 0 2
18 /mutillidae/phpmyadmin/setup/ 29 3 0 2
19 /mutillidae/phpmyadmin/themes/ 30 3 0 2
20 /assets/ 8 1 0 0
21 /cgi-bin/ 9 1 1 0
22 /evil/ 6 1 0 0
23 /gallery2/ 10 1 0 0
24 NaN 3 1 0 0
25 /gallery2/main.php 18 2 1 1
26 /icon/ 6 1 0 0
27 /images/ 8 1 0 0
28 /javascript/ 12 1 0 0
29 /joomla/ 8 1 0 0
def count_query_parameters(uri):
    parsed_uri = urlparse(str(uri))
    query_params = parsed_uri.query
    parsed_query_params = parse_qs(query_params)
    num_query_params = len(parsed_query_params)
    return num_query_params

df['uri_q_params'] = df['http.request.uri'].apply(count_query_parameters)
df.tail(30)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars uri_q_params
1964477 /awstats/awstats.pl?config=owaspbwa&framename=... 139 2 18 1 5
1964478 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 17 1 6
1964479 /awstats/awstats.pl?config=owaspbwa&framename=... 123 2 17 1 5
1964480 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 15 1 6
1964481 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 18 1 5
1964482 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 19 1 6
1964483 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 16 1 5
1964484 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 20 1 6
1964485 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 20 1 5
1964486 /awstats/awstats.pl?config=owaspbwa&framename=... 136 2 19 1 6
1964487 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 21 1 5
1964488 /awstats/awstats.pl?config=owaspbwa&framename=... 141 2 20 1 6
1964489 /awstats/awstats.pl?config=owaspbwa&framename=... 110 2 14 1 5
1964490 /awstats/awstats.pl?config=owaspbwa&framename=... 133 2 18 1 6
1964491 /awstats/awstats.pl?config=owaspbwa&framename=... 115 2 15 1 5
1964492 /awstats/awstats.pl?config=owaspbwa&framename=... 162 2 22 1 6
1964493 /awstats/awstats.pl?config=owaspbwa&framename=... 107 2 13 1 5
1964494 /awstats/awstats.pl?config=owaspbwa&framename=... 167 2 23 1 6
1964495 /awstats/awstats.pl?config=owaspbwa&framename=... 136 2 17 1 5
1964496 /awstats/awstats.pl?config=owaspbwa&framename=... 123 2 16 1 6
1964497 /awstats/awstats.pl?config=owaspbwa&framename=... 141 2 18 1 5
1964498 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 17 1 6
1964499 /awstats/awstats.pl?config=owaspbwa&framename=... 127 2 17 1 5
1964500 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 15 1 6
1964501 /awstats/awstats.pl?config=owaspbwa&framename=... 132 2 18 1 5
1964502 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 19 1 6
1964503 /awstats/awstats.pl?config=owaspbwa&framename=... 124 2 16 1 5
1964504 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 20 1 6
1964505 /awstats/awstats.pl?config=owaspbwa&framename=... 153 2 20 1 5
1964506 /awstats/awstats.pl?config=owaspbwa&framename=... 158 2 21 1 5
def max_query_param_length(uri):
    parsed_uri = urlparse(str(uri))
    query_params = parsed_uri.query
    parsed_query_params = parse_qs(query_params)
    
    max_param_length = 0
    
    for param, values in parsed_query_params.items():
        for value in values:
            max_param_length = max(max_param_length, len(value))
    
    return max_param_length

df['uri_maxq_len'] = df['http.request.uri'].apply(max_query_param_length)
df.tail(30)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars uri_q_params uri_maxq_len
1964477 /awstats/awstats.pl?config=owaspbwa&framename=... 139 2 18 1 5 38
1964478 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 17 1 6 18
1964479 /awstats/awstats.pl?config=owaspbwa&framename=... 123 2 17 1 5 22
1964480 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 15 1 6 14
1964481 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 18 1 5 25
1964482 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 19 1 6 35
1964483 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 16 1 5 21
1964484 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 20 1 6 38
1964485 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 20 1 5 42
1964486 /awstats/awstats.pl?config=owaspbwa&framename=... 136 2 19 1 6 22
1964487 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 21 1 5 45
1964488 /awstats/awstats.pl?config=owaspbwa&framename=... 141 2 20 1 6 25
1964489 /awstats/awstats.pl?config=owaspbwa&framename=... 110 2 14 1 5 15
1964490 /awstats/awstats.pl?config=owaspbwa&framename=... 133 2 18 1 6 21
1964491 /awstats/awstats.pl?config=owaspbwa&framename=... 115 2 15 1 5 18
1964492 /awstats/awstats.pl?config=owaspbwa&framename=... 162 2 22 1 6 42
1964493 /awstats/awstats.pl?config=owaspbwa&framename=... 107 2 13 1 5 14
1964494 /awstats/awstats.pl?config=owaspbwa&framename=... 167 2 23 1 6 45
1964495 /awstats/awstats.pl?config=owaspbwa&framename=... 136 2 17 1 5 35
1964496 /awstats/awstats.pl?config=owaspbwa&framename=... 123 2 16 1 6 15
1964497 /awstats/awstats.pl?config=owaspbwa&framename=... 141 2 18 1 5 38
1964498 /awstats/awstats.pl?config=owaspbwa&framename=... 128 2 17 1 6 18
1964499 /awstats/awstats.pl?config=owaspbwa&framename=... 127 2 17 1 5 22
1964500 /awstats/awstats.pl?config=owaspbwa&framename=... 120 2 15 1 6 14
1964501 /awstats/awstats.pl?config=owaspbwa&framename=... 132 2 18 1 5 25
1964502 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 19 1 6 35
1964503 /awstats/awstats.pl?config=owaspbwa&framename=... 124 2 16 1 5 21
1964504 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 20 1 6 38
1964505 /awstats/awstats.pl?config=owaspbwa&framename=... 153 2 20 1 5 42
1964506 /awstats/awstats.pl?config=owaspbwa&framename=... 158 2 21 1 5 45
# Drop rows with NaN values
df.dropna(inplace=True)
df
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars uri_q_params uri_maxq_len
0 /dvwa/config/ 13 2 0 1 0 0
1 /dvwa/docs/ 11 2 0 1 0 0
2 /dvwa/external/ 15 2 0 1 0 0
3 /mutillidae/ajax/ 17 2 0 1 0 0
4 /mutillidae/classes/ 20 2 0 1 0 0
... ... ... ... ... ... ... ...
1964502 /awstats/awstats.pl?config=owaspbwa&framename=... 149 2 19 1 6 35
1964503 /awstats/awstats.pl?config=owaspbwa&framename=... 124 2 16 1 5 21
1964504 /awstats/awstats.pl?config=owaspbwa&framename=... 154 2 20 1 6 38
1964505 /awstats/awstats.pl?config=owaspbwa&framename=... 153 2 20 1 5 42
1964506 /awstats/awstats.pl?config=owaspbwa&framename=... 158 2 21 1 5 45

1914976 rows × 7 columns

# Create an Isolation Forest model
model=IsolationForest(contamination=0.1)
model
<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: scikit-learn/scikit-learn#21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style>
IsolationForest(contamination=0.1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
IsolationForest
IsolationForest(contamination=0.1)
# Fit the model on your preprocessed data
required_df = df[['uri_len','uri_segs','uri_s_chars','uri_res_chars','uri_q_params','uri_maxq_len']]
model.fit(required_df)
<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: scikit-learn/scikit-learn#21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style>
IsolationForest(contamination=0.1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
IsolationForest
IsolationForest(contamination=0.1)
# Predict anomalies
df['scores']=model.decision_function(required_df)
df['anomaly']=model.predict(required_df)
df.head(20)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars uri_q_params uri_maxq_len scores anomaly
0 /dvwa/config/ 13 2 0 1 0 0 0.090492 1
1 /dvwa/docs/ 11 2 0 1 0 0 0.087122 1
2 /dvwa/external/ 15 2 0 1 0 0 0.096042 1
3 /mutillidae/ajax/ 17 2 0 1 0 0 0.100049 1
4 /mutillidae/classes/ 20 2 0 1 0 0 0.102356 1
5 /mutillidae/data/ 17 2 0 1 0 0 0.100049 1
6 /mutillidae/documentation/ 26 2 0 1 0 0 0.102650 1
7 /mutillidae/images/ 19 2 0 1 0 0 0.101738 1
8 /mutillidae/includes/ 21 2 0 1 0 0 0.104347 1
9 /mutillidae/javascript/ 23 2 0 1 0 0 0.107356 1
10 /mutillidae/passwords/ 22 2 0 1 0 0 0.106141 1
11 /mutillidae/phpmyadmin/ 23 2 0 1 0 0 0.107356 1
12 /mutillidae/styles/ 19 2 0 1 0 0 0.101738 1
13 /mutillidae/test/ 17 2 0 1 0 0 0.100049 1
14 /mutillidae/webservices/ 24 2 0 1 0 0 0.104205 1
15 /mutillidae/phpmyadmin/examples/ 32 3 0 2 0 0 0.138496 1
16 /mutillidae/phpmyadmin/js/ 26 3 0 2 0 0 0.145323 1
17 /mutillidae/phpmyadmin/locale/ 30 3 0 2 0 0 0.140898 1
18 /mutillidae/phpmyadmin/setup/ 29 3 0 2 0 0 0.141864 1
19 /mutillidae/phpmyadmin/themes/ 30 3 0 2 0 0 0.140898 1
anomaly=df[df['anomaly']==-1]
anomaly.head(50)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
http.request.uri uri_len uri_segs uri_s_chars uri_res_chars uri_q_params uri_maxq_len scores anomaly
20 /assets/ 8 1 0 0 0 0 -0.031069 -1
21 /cgi-bin/ 9 1 1 0 0 0 -0.017994 -1
22 /evil/ 6 1 0 0 0 0 -0.034518 -1
23 /gallery2/ 10 1 0 0 0 0 -0.023963 -1
26 /icon/ 6 1 0 0 0 0 -0.034518 -1
27 /images/ 8 1 0 0 0 0 -0.031069 -1
28 /javascript/ 12 1 0 0 0 0 -0.020447 -1
29 /joomla/ 8 1 0 0 0 0 -0.031069 -1
30 /phpBB2/ 8 1 0 0 0 0 -0.031069 -1
31 /phpmyadmin/ 12 1 0 0 0 0 -0.020447 -1
32 /test/ 6 1 0 0 0 0 -0.034518 -1
33 /test/ 6 1 0 0 0 0 -0.034518 -1
34 /wordpress/ 11 1 0 0 0 0 -0.021533 -1
54 / 1 1 0 0 0 0 -0.035260 -1
55 / 1 1 0 0 0 0 -0.035260 -1
56 / 1 1 0 0 0 0 -0.035260 -1
59 / 1 1 0 0 0 0 -0.035260 -1
61 / 1 1 0 0 0 0 -0.035260 -1
66 /phpBB2/login.php?redirect=admin/&amp;sid=eb3e... 74 2 6 1 2 32 -0.164158 -1
80 /nmaplowercheck1685892421 25 1 0 0 0 0 -0.005327 -1
81 / 1 1 0 0 0 0 -0.035260 -1
82 /nmaplowercheck1685892421 25 1 0 0 0 0 -0.005327 -1
83 /nmaplowercheck1685892421 25 1 0 0 0 0 -0.005327 -1
84 / 1 1 0 0 0 0 -0.035260 -1
85 / 1 1 0 0 0 0 -0.035260 -1
91 /HNAP1 6 1 0 0 0 0 -0.034518 -1
92 /HNAP1 6 1 0 0 0 0 -0.034518 -1
95 /HNAP1 6 1 0 0 0 0 -0.034518 -1
99 /wordpress/wp-login.php?redirect_to=%2Fwordpre... 62 2 9 1 1 20 -0.110149 -1
102 / 1 1 0 0 0 0 -0.035260 -1
103 / 1 1 0 0 0 0 -0.035260 -1
104 / 1 1 0 0 0 0 -0.035260 -1
105 / 1 1 0 0 0 0 -0.035260 -1
106 / 1 1 0 0 0 0 -0.035260 -1
113 /randomfile1 12 1 0 0 0 0 -0.020447 -1
114 /frand2 7 1 0 0 0 0 -0.030701 -1
115 /.bash_history 14 1 2 0 0 0 -0.022261 -1
116 /.bashrc 8 1 1 0 0 0 -0.021757 -1
117 /.cache 7 1 1 0 0 0 -0.021395 -1
118 /.config 8 1 1 0 0 0 -0.021757 -1
119 /.cvs 5 1 1 0 0 0 -0.025407 -1
120 /.cvsignore 11 1 1 0 0 0 -0.012127 -1
121 /.forward 9 1 1 0 0 0 -0.017994 -1
123 /.history 9 1 1 0 0 0 -0.017994 -1
124 /.hta 5 1 1 0 0 0 -0.025407 -1
125 /.hta_ 6 1 2 0 0 0 -0.036611 -1
126 /.htaccess 10 1 1 0 0 0 -0.014515 -1
127 /.htaccess_ 11 1 2 0 0 0 -0.028929 -1
128 /.htpasswd 10 1 1 0 0 0 -0.014515 -1
129 /.htpasswd_ 11 1 2 0 0 0 -0.028929 -1

About

Exploring Isolationforest model w.r.t http request uri to detect anomalies


Languages

Language:Jupyter Notebook 100.0%