Marshhhhh / first

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

й

Marshhhhh opened this issue · comments

Yesterday I wanted to think of an unreal situation. What would be different in my life, when I was of the opposite sex. Hmm ... I would be a blonde strong men. Because I would be have sport training. Also I necessarily would have a driver's license, because I don't have it now. And I would want to know how gun works, why plane flies and rocket takes off. I think that mens have more constructive intellection.
What about my profession I would be a few change my speciality and learn programming.
And additional point, I should to serve in the military. I would do it.
What else, I think I wouldn't have a new interests in my life.
I also would learn mathematical and economical sciences as now. And I also would want to visit different countries and find out distinction of different cultures as now.
Becouse I would be have the same genes, parents, life as now.

#1 Logit bd1 cat
d_t <- train.cat1[,c]
mylogit1 <- step(glm(bd1 ~ .,
d_t[,-c(1,14)],
family=binomial(link="logit")),
direction="backward",trace=F)

testp1 <- round(predict(mylogit1,type="response",newdata = test.cat1),6)
pred1 <- prediction(testp1,test.cat1$bd1)
perf1 <- performance(pred1,"tpr","fpr")

#2 RandomForest bd1

set.seed(124)
d_t <- train[,c]
d_t <- d_t[,-c(13)]
d_t$bd1 <- as.factor(d_t$bd1)

RF <- function(d_t, size = 10000, ntree = 300){

d_t$bd1 <- as.factor(d_t$bd1)

s1 <- sample(d_t$id_credit,replace = F,size = size)
ds1 <- d_t[d_t$id_credit%in%s1,]

proc <- summary(ds1$bd1)[1]/nrow(ds1)

arf1 <- randomForest(bd1 ~ .,
data=ds1,
# replace = F,
ntree=300,
importance=T,
proximity=T,
mtry=3,
type = "classification")

pred <- predict(arf1,test,type='prob')[,2]
return(pred)
}

A1 <- RF(d_t)

MA <- as.data.frame(test[,"id_credit"])
MA$A9 <- A1

testp5 <- apply(MA[,-1],1,mean)
pred5 <- prediction(testp5,test$bd1)
perf5 <- performance(pred5,"tpr","fpr")

#2 RandomForest bd3

set.seed(124)
d_t <- train[,c]
d_t <- d_t[,-c(12)]
d_t$bd3 <- as.factor(d_t$bd3)

names(d_t)

RF <- function(d_t, size = 10000, ntree = 300){

d_t$bd3 <- as.factor(d_t$bd3)

s1 <- sample(d_t$id_credit,replace = F,size = size)
ds1 <- d_t[d_t$id_credit%in%s1,]

proc <- summary(ds1$bd3)[1]/nrow(ds1)

arf1 <- randomForest(bd3 ~ .,
                     data=ds1,
                     # replace = F,
                     ntree=300,
                     importance=T,
                     proximity=T,
                     mtry=3,
                     type = "classification")

pred <- predict(arf1,test,type='prob')[,2]
return(pred)

}

A1 <- RF(d_t)

MAA <- as.data.frame(test[,"id_credit"])
MAA$A10 <- A1

testp6 <- apply(MAA[,-1],1,mean)
pred6 <- prediction(testp6,test$bd3)
perf6 <- performance(pred6,"tpr","fpr")

CTree bd1

#conditional inference trees corrects for known biases in chaid and cart
library(party)
set.seed(124)
d_t <- train[,c]
d_t <- d_t[,-c(13)]

d_t$bd3 <- as.factor(d_t$bd3)

cfit1 <- ctree(bd1 ~ .,data=d_t)
plot(cfit1)

resultdfr <- as.data.frame(do.call("rbind", treeresponse(cfit1, newdata = test)))

testp9 <- resultdfr[,1]
pred9 <- prediction(testp9,test$bd1)
perf9 <- performance(pred9,"tpr","fpr")

CTree bd3

#conditional inference trees corrects for known biases in chaid and cart
library(party)
set.seed(124)
d_t <- train[,c]
d_t <- d_t[,-c(12)]

d_t$bd3 <- as.factor(d_t$bd3)

cfit2 <- ctree(bd3 ~ .,data=d_t)
plot(cfit1)

resultdfr <- as.data.frame(do.call("rbind", treeresponse(cfit2, newdata = test)))

testp8 <- resultdfr[,1]
pred8 <- prediction(testp8,test$bd3)
perf8 <- performance(pred8,"tpr","fpr")

drop(Predict(cfit1,test))

#11 Logit bd3 cat
d_t <- train.cat3[,c]
d_t$bd3 <- ifelse(d_t$bd3==1,0,1)
mylogit11 <- step(glm(bd3 ~ .,
d_t[,-c(1,13)],
family=binomial(link="logit")),
direction="backward",trace=F)

testp11 <- round(predict(mylogit11,type="response",newdata = test.cat3),6)
pred11 <- prediction(testp11,test.cat3$bd3)
perf11 <- performance(pred11,"tpr","fpr")

SVM

library("e1071")
set.seed(124)
d_t <- train[,c]
d_t <- d_t[,-c(13)]
d_t$bd1 <- as.factor(d_t$bd1)

sv <- svm(bd1 ~ .,
d_t[,-1],
kernel="linear",
cost = 10,
scale=F)

Mean all

testp0 <- apply(cbind(testp11,testp6,testp8),1,mean)
testp0 <- apply(cbind(testp1,testp5,testp9),1,mean)
pred0 <- prediction(testp0,train.cat3$bd1)
perf0 <- performance(pred0,"tpr","fpr")

#plotting logistic results vs. random forest ROC

library("ROCR")
par(font=6, font.lab=6, font.main=6,cex.lab=1,cex.axis=1)
plot(perf1,col="darkorange",lty=1, lwd=3,type="l",
xlab="1-Специфичность", ylab="Чувствительность",
main=list("ROC-кривая", cex = 1,font = 1))
abline(h = seq(0,1,0.05), v = seq(0,1,0.05), col = "lightgray", lty=3)
abline(a=0, b=1, col = "gray60")

plot(perf1,col="darkorange",lty=1, lwd=2,type="l",add=TRUE)

plot(perf2,col="tomato1",lty=1, lwd=2,type="l",add=TRUE)

plot(perf3,col="dodgerblue",lty=1, lwd=2,type="l",add=TRUE)

plot(perf4,col="turquoise2",lty=1, lwd=2,type="l",add=TRUE)

plot(perf11,col="dodgerblue",lty=1, lwd=2,type="l",add=TRUE)
plot(perf5,col="yellow",lty=1, lwd=2,type="l",add=TRUE)
plot(perf6,col="green",lty=1, lwd=2,type="l",add=TRUE)
plot(perf9,col="gray",lty=1, lwd=2,type="l",add=TRUE)
plot(perf8,col="pink",lty=1, lwd=2,type="l",add=TRUE)
plot(perf0,col="black",lty=1, lwd=2,type="l",add=TRUE)

auc1 <- performance(pred1,"auc")
auc1 <- unlist(slot(auc1, "y.values"))

auc2 <- performance(pred2,"auc")

auc2 <- unlist(slot(auc2, "y.values"))

auc3 <- performance(pred3,"auc")

auc3 <- unlist(slot(auc3, "y.values"))

auc4 <- performance(pred4,"auc")
auc4 <- unlist(slot(auc4, "y.values"))

auc11 <- performance(pred11,"auc")
auc11 <- unlist(slot(auc11, "y.values"))

auc5 <- performance(pred5,"auc")
auc5 <- unlist(slot(auc5, "y.values"))

auc6 <- performance(pred6,"auc")
auc6 <- unlist(slot(auc6, "y.values"))

auc9 <- performance(pred9,"auc")
auc9 <- unlist(slot(auc9, "y.values"))

auc8 <- performance(pred8,"auc")
auc8 <- unlist(slot(auc8, "y.values"))

auc0 <- performance(pred0,"auc")
auc0 <- unlist(slot(auc0, "y.values"))

leg <- c(paste0("Gini_L1 = ",round((auc1-0.5)*2,digits=5)),
# paste0("Gini_L2 = ",round((auc2-0.5)*2,digits=4),"\n"),
# paste0("Gini_RF1 = ",round((auc3-0.5)*2,digits=4),"\n"),
# paste0("Gini_RF2 = ",round((auc4-0.5)*2,digits=5),"\n"),
paste0("Gini_L11 = ",round((auc11-0.5)*2,digits=5)),
paste0("Gini_RF10 = ",round((auc5-0.5)*2,digits=5)),
paste0("Gini_RF30 = ",round((auc6-0.5)*2,digits=5)),
paste0("Gini_Ct = ",round((auc9-0.5)*2,digits=5)),
paste0("Gini_Ct2 = ",round((auc8-0.5)*2,digits=5)),
paste0("Gini_All = ",round((auc0-0.5)*2,digits=5)))

legend(0.4,0.5,cex = 0.8,bty="n",lwd=2,lty=c(1,1),
col=c("darkorange","dodgerblue","yellow","green","gray","pink","black"),
legend=leg)

connfusion matrix

library(e1071)
library(caret)

confusionMatrix(data=train.cat3$bd3,
reference=ifelse(testp11>0.3,1,0),
positive = "1")