Analyze Maxent output in R and python

Question

Analyze Maxent output in R and python

Huhu0920 opened this issue 8 months ago · comments

I followed the English tutorial and tutorial data on the website, then analyzed the results generated by MaxEnt in R, and successfully plotted the ROC curve using the code in the tutorial. However, when I use python "sklearn" library to calculate the model performance metrics such as Accuracy, Recall, Precision, and F1 Score, as well as to plot the PR curve and calculate the area under the curve, the values I got are a bit abnormal. Below is my code and results, your help is much appreciated.

Code 1 is:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics

Test_AUC = 0
Test_F1 = 0
AUC_PR = 0

Lists to store precision-recall values for each iteration

precision_list = []
recall_list = []

for i in range(10):
path_salmpe = f"E:/Sample/Model/1.5/MaEnt1.5/bradypus_{i}_samplePredictions.csv"
presence = pd.read_csv(path_salmpe)
pp = presence['Logistic prediction']

path_background = f"E:/Sample/Model/1.5/MaEnt1.5/bradypus_{i}_backgroundPredictions.csv"
background = pd.read_csv(path_background)
bb = background['Logistic']

# Calculate Test AUC
testpp = pp[presence['Test or train'] == 'test']
combined = pd.concat([testpp, bb])
label = [1] * len(testpp) + [0] * len(bb)
fpr_test, tpr_test, _ = metrics.roc_curve(label, combined)
auc_test = metrics.auc(fpr_test, tpr_test)
pd.DataFrame({'x': fpr_test, 'tpr_test': tpr_test}).to_csv(f"tpr4_{i}.csv", index=False)
pd.DataFrame({'y': tpr_test, 'fpr_test': fpr_test}).to_csv(f"fpr4_{i}.csv", index=False)
Test_AUC += auc_test

# Calculate and Plot PR Curve
precision, recall, _ = metrics.precision_recall_curve(label, combined)
precision_list.append(precision)
recall_list.append(recall)

print(precision_list,recall_list)

Output is:
C:\Users\84455\Desktop\Pythonproject\venv\Scripts\python.exe C:\Users\84455\Desktop\Pythonproject\shiyan.py
[array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ]), array([0.00279218, 0.00279246, 0.00279274, ..., 0. , 0. ,
1. ])] [array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.]), array([1., 1., 1., ..., 0., 0., 0.])]

Code 2 is:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics

Test_AUC = 0
Test_F1 = 0
AUC_PR = 0

Lists to store precision-recall values for each iteration

precision_list = []
recall_list = []

for i in range(10):
path_salmpe = f"E:/Sample/Model/1.5/MaEnt1.5/bradypus_{i}_samplePredictions.csv"
presence = pd.read_csv(path_salmpe)
pp = presence['Logistic prediction']

path_background = f"E:/Sample/Model/1.5/MaEnt1.5/bradypus_{i}_backgroundPredictions.csv"
background = pd.read_csv(path_background)
bb = background['Logistic']

# Calculate Test AUC
testpp = pp[presence['Test or train'] == 'test']
#print(testpp,)

print("1111")

#print(len(testpp),type(testpp),type(bb),len(bb))
combined = pd.concat([testpp, bb])
#print(combined)
label = [1] * len(testpp) + [0] * len(bb)
#print([1] * len(testpp),len([1] * len(testpp)))
fpr_test, tpr_test, _ = metrics.roc_curve(label, combined)
#print(fpr_test,tpr_test)
auc_test = metrics.auc(fpr_test, tpr_test)
pd.DataFrame({'x': fpr_test, 'tpr_test': tpr_test}).to_csv(f"tpr4_{i}.csv", index=False)
pd.DataFrame({'y': tpr_test, 'fpr_test': fpr_test}).to_csv(f"fpr4_{i}.csv", index=False)
Test_AUC += auc_test

# Calculate and Plot PR Curve
precision, recall, thresholds = metrics.precision_recall_curve(label, combined)
#print(label,combined)
#print(precision,recall)
precision_list.append(precision)
recall_list.append(recall)
print(thresholds,len(thresholds),len(recall))
#print(len(label),label,combined)
#print(precision,recall,)
#print(precision_list,recall_list)
# Calculate F1 Score
f1_test = metrics.f1_score(label, combined.round())
Test_F1 += f1_test

# Calculate AUC-PR
auc_pr_test = metrics.auc(recall, precision)
AUC_PR += auc_pr_test

Calculate averages

average_Test_AUC = Test_AUC / 10
average_Test_F1 = Test_F1 / 10
average_AUC_PR = AUC_PR / 10

Write averages to CSV

averages_df = pd.DataFrame({'average_Test_AUC': [average_Test_AUC],
'average_Test_F1': [average_Test_F1],
'average_AUC_PR': [average_AUC_PR]})
averages_df.to_csv("averages.csv", index=False)

Plot PR Curves for each iteration

plt.figure()
for i in range(10):
plt.plot(recall_list[i], precision_list[i], label=f'Iteration {i + 1}')

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("PR Curves for 10 Iterations")
plt.legend()
plt.show()

print(f"average_Test_AUC: {average_Test_AUC}")
print(f"average_Test_F1: {average_Test_F1}")
print(f"average_AUC_PR: {average_AUC_PR}")

output is：
C:\Users\84455\Desktop\Pythonproject\venv\Scripts\python.exe C:\Users\84455\Desktop\Pythonproject\模型验证指标.py
[9.84120000e-05 1.69733000e-04 2.80627000e-04 ... 9.56091995e-01
9.59713240e-01 9.63520135e-01] 10020 10021
[5.55250000e-05 9.83350000e-05 1.69482000e-04 ... 9.58563149e-01
9.60605718e-01 9.64024315e-01] 10020 10021
[1.01890000e-05 2.00410000e-05 2.00620000e-05 ... 9.48663581e-01
9.49878971e-01 9.52650268e-01] 10020 10021
[6.71490000e-05 1.18648000e-04 1.32370000e-04 ... 9.56108233e-01
9.59179799e-01 9.60998757e-01] 10020 10021
[8.38520000e-05 1.39355000e-04 1.41382000e-04 ... 9.53893315e-01
9.54867463e-01 9.55556774e-01] 10020 10021
[8.20600000e-06 1.78000000e-05 2.28010000e-05 ... 9.54753610e-01
9.58550968e-01 9.59193438e-01] 10019 10020
[8.71630000e-05 1.44540000e-04 1.67089000e-04 ... 9.51542110e-01
9.54680135e-01 9.55923510e-01] 10020 10021
[6.13810000e-05 9.57040000e-05 1.01624000e-04 ... 9.56951969e-01
9.57777237e-01 9.57936305e-01] 10015 10016
[1.15933000e-04 1.98368000e-04 2.79514000e-04 ... 9.53967091e-01
9.57507655e-01 9.57608380e-01] 10020 10021
[4.45110000e-05 8.07610000e-05 1.17884000e-04 ... 9.60026378e-01
9.61561001e-01 9.62771068e-01] 10020 10021
average_Test_AUC: 0.8499257142857143
average_Test_F1: 0.029895908369968943
average_AUC_PR: 0.03254011072180415