semnan-university-ai / Internet-Firewall

Internet Firewall Data Data Set

Home Page:http://archive.ics.uci.edu/ml/datasets/Internet+Firewall+Data

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

# Author : Amir Shokri
# github link : https://github.com/amirshnll/Internet-Firewall
# dataset link : http://archive.ics.uci.edu/ml/datasets/Internet+Firewall+Data
# email : amirsh.nll@gmail.com
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


df = pd.read_csv('firewall.csv')
df
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Source Port Destination Port NAT Source Port NAT Destination Port Action Bytes Bytes Sent Bytes Received Packets Elapsed Time (sec) pkts_sent pkts_received
0 57222 53 54587 53 allow 177 94 83 2 30 1 1
1 56258 3389 56258 3389 allow 4768 1600 3168 19 17 10 9
2 6881 50321 43265 50321 allow 238 118 120 2 1199 1 1
3 50553 3389 50553 3389 allow 3327 1438 1889 15 17 8 7
4 50002 443 45848 443 allow 25358 6778 18580 31 16 13 18
... ... ... ... ... ... ... ... ... ... ... ... ...
65527 63691 80 13237 80 allow 314 192 122 6 15 4 2
65528 50964 80 13485 80 allow 4680740 67312 4613428 4675 77 985 3690
65529 54871 445 0 0 drop 70 70 0 1 0 1 0
65530 54870 445 0 0 drop 70 70 0 1 0 1 0
65531 54867 445 0 0 drop 70 70 0 1 0 1 0

65532 rows × 12 columns

df.info() 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65532 entries, 0 to 65531
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Source Port           65532 non-null  int64 
 1   Destination Port      65532 non-null  int64 
 2   NAT Source Port       65532 non-null  int64 
 3   NAT Destination Port  65532 non-null  int64 
 4   Action                65532 non-null  object
 5   Bytes                 65532 non-null  int64 
 6   Bytes Sent            65532 non-null  int64 
 7   Bytes Received        65532 non-null  int64 
 8   Packets               65532 non-null  int64 
 9   Elapsed Time (sec)    65532 non-null  int64 
 10  pkts_sent             65532 non-null  int64 
 11  pkts_received         65532 non-null  int64 
dtypes: int64(11), object(1)
memory usage: 6.0+ MB
y = df['Action'].values
y = y.reshape(-1,1)
x_data = df.drop(['Action'],axis = 1)
print(x_data)
       Source Port  Destination Port  NAT Source Port  NAT Destination Port  \
0            57222                53            54587                    53   
1            56258              3389            56258                  3389   
2             6881             50321            43265                 50321   
3            50553              3389            50553                  3389   
4            50002               443            45848                   443   
...            ...               ...              ...                   ...   
65527        63691                80            13237                    80   
65528        50964                80            13485                    80   
65529        54871               445                0                     0   
65530        54870               445                0                     0   
65531        54867               445                0                     0   

         Bytes  Bytes Sent  Bytes Received  Packets  Elapsed Time (sec)  \
0          177          94              83        2                  30   
1         4768        1600            3168       19                  17   
2          238         118             120        2                1199   
3         3327        1438            1889       15                  17   
4        25358        6778           18580       31                  16   
...        ...         ...             ...      ...                 ...   
65527      314         192             122        6                  15   
65528  4680740       67312         4613428     4675                  77   
65529       70          70               0        1                   0   
65530       70          70               0        1                   0   
65531       70          70               0        1                   0   

       pkts_sent  pkts_received  
0              1              1  
1             10              9  
2              1              1  
3              8              7  
4             13             18  
...          ...            ...  
65527          4              2  
65528        985           3690  
65529          1              0  
65530          1              0  
65531          1              0  

[65532 rows x 11 columns]
sns.countplot(x='Action',data=df,palette='hls')
plt.show()

png

#normalize data

x = (x_data - np.min(x_data)) / (np.max(x_data) / np.min(x_data)).values
x.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Source Port Destination Port NAT Source Port NAT Destination Port Bytes Bytes Sent Bytes Received Packets Elapsed Time (sec) pkts_sent pkts_received
0 0.0 0.0 0.0 0.0 0.000006 0.000002 0.0 9.651429e-07 0.0 0.000000 0.0
1 0.0 0.0 0.0 0.0 0.000223 0.000097 0.0 1.737257e-05 0.0 0.000012 0.0
2 0.0 0.0 0.0 0.0 0.000008 0.000004 0.0 9.651429e-07 0.0 0.000000 0.0
3 0.0 0.0 0.0 0.0 0.000154 0.000087 0.0 1.351200e-05 0.0 0.000009 0.0
4 0.0 0.0 0.0 0.0 0.001196 0.000425 0.0 2.895429e-05 0.0 0.000016 0.0
#data train & data test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state= 300)
print("x_train",x_train.shape)
print("x_test",x_test.shape)
print("y_train",y_train.shape)
print("y_test",y_test.shape)
x_train (45872, 11)
x_test (19660, 11)
y_train (45872, 1)
y_test (19660, 1)
#decision tree classifier

from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn import preprocessing
from sklearn.metrics import accuracy_score

dt = DecisionTreeClassifier()
dt = dt.fit(x_train,y_train)
y_forecast=dt.predict(x_test)

from sklearn.metrics import classification_report
print(classification_report(y_test, dt.predict(x_test)))
print('accuracy:{:.4f}'.format(dt.score(x_test, y_test)))

from sklearn import tree
plt.figure(figsize=(30,30))
temp = tree.plot_tree(dt.fit(x,y), fontsize=10)
plt.show()
              precision    recall  f1-score   support

       allow       0.99      0.96      0.98     11381
        deny       0.75      0.98      0.85      4456
        drop       0.88      0.67      0.76      3808
  reset-both       0.83      0.33      0.48        15

    accuracy                           0.91     19660
   macro avg       0.87      0.74      0.77     19660
weighted avg       0.92      0.91      0.91     19660

accuracy:0.9074

png

#Nave Bayes Classifier

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb = nb.fit(x_train, y_train.ravel())
y_forecast=nb.predict(x_test)

from sklearn.metrics import classification_report
print(classification_report(y_test, nb.predict(x_test)))
print('Nave Bayes Classifier{:.4f}'.format(nb.score(x_test, y_test)))
C:\Users\User\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       allow       1.00      0.36      0.53     11381
        deny       0.11      0.14      0.12      4456
        drop       0.37      1.00      0.54      3808
  reset-both       0.00      0.00      0.00        15

    accuracy                           0.43     19660
   macro avg       0.37      0.37      0.30     19660
weighted avg       0.68      0.43      0.44     19660

Nave Bayes Classifier0.4311
#Logistic Regreession Classifier

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(solver='lbfgs')
lr = lr.fit(x_train, y_train.ravel())
y_forecast=lr.predict(x_test)


from sklearn.metrics import classification_report
print(classification_report(y_test, lr.predict(x_test)))
print('accuracy:{:.4f}'.format(lr.score(x_test, y_test)))

C:\Users\User\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       allow       0.58      1.00      0.73     11381
        deny       0.00      0.00      0.00      4456
        drop       0.00      0.00      0.00      3808
  reset-both       0.00      0.00      0.00        15

    accuracy                           0.58     19660
   macro avg       0.14      0.25      0.18     19660
weighted avg       0.34      0.58      0.42     19660

accuracy:0.5789
#Knn Classifier
from sklearn.neighbors import KNeighborsClassifier
K = 1
knn = KNeighborsClassifier(n_neighbors=K)
knn = knn.fit(x_train,y_train.ravel())
print("k = {}neighbors , knn test:{}".format(K, knn.score(x_test, y_test)))
print("knn = {}neighbors , knn train:{}".format(K, knn.score(x_train, y_train)))

ran = np.arange(1,40)
train_list = []
test_list = []
for i,each in enumerate(ran):
    knn = KNeighborsClassifier(n_neighbors=each)
    knn = knn.fit(x_train, y_train.ravel())
    test_list.append(knn.score(x_test, y_test))
    train_list.append(knn.score(x_train, y_train))
    
print("best test {} , k={}".format(np.max(test_list),test_list.index(np.max(test_list))+1))
print("best train {} , k={}".format(np.max(train_list),train_list.index(np.max(train_list))+1))
k = 1neighbors , knn test:0.8978128179043744
knn = 1neighbors , knn train:0.8984129752354377
#mlp classifier
from sklearn.neural_network import MLPClassifier
clfm = MLPClassifier(hidden_layer_sizes=(5,), max_iter=1000)
clfm.fit(x_train, y_train.ravel())
y_predm = clfm.predict(x_test)
print("ACCTURACY:", metrics.accurecy_score(y_test, y_predm))
print(classification_report(y_test, clfk.predict(x_test)))
print("mlp:", clfk.score(x_test, y_test))

About

Internet Firewall Data Data Set

http://archive.ics.uci.edu/ml/datasets/Internet+Firewall+Data

License:MIT License


Languages

Language:HTML 63.9%Language:Jupyter Notebook 35.7%Language:Python 0.3%