Created
October 20, 2017 13:53
-
-
Save vrajesh26/a1f1af6d83a2e9eab1ec19c253a0b0ec to your computer and use it in GitHub Desktop.
To identify the customer segments who are eligible for loan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| train<-read.csv("D:/loan prediction/loan_train.csv",na.strings = c(""," ",NA)) | |
| test<-read.csv("D:/loan prediction/loan_test.csv",na.strings = c(""," ",NA)) | |
| View(train) | |
| colSums(is.na(train)) | |
| colSums(is.na(test)) | |
| dim(train) | |
| library(mlr) | |
| summarizeColumns(train) | |
| summarizeColumns(train) | |
| library("ggplot2") | |
| ggplot(data = train, mapping = aes(Loan_Status,fill=Loan_Status))+geom_bar() | |
| prop.table(table(train$Loan_Status)) | |
| ggplot(data = train, mapping = aes(Gender,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Gender))+geom_bar() | |
| gl<-table(train$Gender,train$Loan_Status) | |
| chisq.test(gl) | |
| ggplot(data = train, mapping = aes(Married,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Married))+geom_bar() | |
| ml<-table(train$Married,train$Loan_Status) | |
| chisq.test(ml) | |
| ggplot(data = train, mapping = aes(Dependents,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Dependents))+geom_bar() | |
| levels(train$Dependents) | |
| dl<-table(train$Dependents,train$Loan_Status) | |
| chisq.test(dl) | |
| ggplot(data = train, mapping = aes(Education,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Education))+geom_bar() | |
| el<-table(train$Education,train$Loan_Status) | |
| chisq.test(el) | |
| ggplot(data = train, mapping = aes(Self_Employed,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Self_Employed))+geom_bar() | |
| sl<-table(train$Self_Employed,train$Loan_Status) | |
| chisq.test(sl) | |
| ggplot(data = train, mapping = aes(ApplicantIncome,fill=Loan_Status))+geom_histogram() | |
| ggplot(data = test, mapping = aes(ApplicantIncome))+geom_histogram() | |
| ggplot(data = train, mapping = aes(CoapplicantIncome,fill=Loan_Status))+geom_histogram() | |
| ggplot(data = test, mapping = aes(CoapplicantIncome))+geom_histogram() | |
| ggplot(data = train, mapping = aes(LoanAmount,fill=Loan_Status))+geom_histogram() | |
| ggplot(data = test, mapping = aes(LoanAmount))+geom_histogram() | |
| summary(train$Loan_Amount_Term) | |
| ggplot(data = train, mapping = aes(Loan_Amount_Term,fill=Loan_Status))+geom_histogram() | |
| summary(train$Credit_History) | |
| train$Credit_History <-as.factor(train$Credit_History) | |
| test$Credit_History <- as.factor(test$Credit_History) | |
| levels(train$Credit_History) | |
| ggplot(data = train, mapping = aes(Credit_History,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Credit_History))+geom_bar() | |
| cl<-table(train$Credit_History,train$Loan_Status) | |
| chisq.test(cl) | |
| ggplot(data = train, mapping = aes(Property_Area,fill=Loan_Status))+geom_bar() | |
| ggplot(data = test, mapping = aes(Property_Area))+geom_bar() | |
| pl<-table(train$Property_Area,train$Loan_Status) | |
| chisq.test(pl) | |
| summary(is.na(train)) | |
| loan<-rbind(train[,2:12],test[,2:12]) | |
| colSums(is.na(loan)) | |
| #Feature | |
| loan$TotalIncome=loan$ApplicantIncome+loan$CoapplicantIncome | |
| loan$Married[is.na(loan$Married) & loan$CoapplicantIncome==0]<-"No" | |
| loan$Married[is.na(loan$Married)]<- "Yes" | |
| loan[is.na(loan$Gender) & is.na(loan$Dependents),] | |
| loan$Gender[is.na(loan$Gender) & is.na(loan$Dependents)] <- "Male" | |
| loan$Dependents[is.na(loan$Dependents) & loan$Married=="No"]<- "0" | |
| mm <- loan[(loan$Gender=="Male" & loan$Married=="Yes"),c(3,6:9,11)] | |
| mmtrain<-mm[!is.na(mm$Dependents),] | |
| mmtest<- mm[is.na(mm$Dependents),] | |
| library("rpart") | |
| depFit <- rpart(data=mmtrain,Dependents~.,xval=3) | |
| rpart.plot(depFit) | |
| p<-predict(depFit,mmtrain,type="class") | |
| loan$Dependents[is.na(loan$Dependents) & loan$Gender=="Male" & | |
| loan$Married == "Yes"]<- predict(depFit,newdata=mmtest,type="class") | |
| gtrain<-loan[!is.na(loan$Gender),1:7] | |
| gtest<-loan[is.na(loan$Gender),1:7] | |
| genFit<-rpart(data=gtrain,Gender~.,xval=3) | |
| rpart.plot(genFit) | |
| p<-predict(genFit,gtrain,type="class") | |
| loan$Gender[is.na(loan$Gender)]<-predict(genFit,gtest,type="class") | |
| table(loan$Self_Employed) | |
| loan$Self_Employed[is.na(loan$Self_Employed)] <- "No" | |
| library(car) | |
| loan$Credit_History<-recode(loan$Credit_History,"NA=2") | |
| ltrain<-loan[!is.na(loan$LoanAmount) & loan$LoanAmount<500,c(1:8,10)] | |
| ltest <- loan[is.na(loan$LoanAmount),c(1:8,10)] | |
| loanFit <- glm(data=ltrain,LoanAmount~.,na.action=na.exclude) | |
| loan$LoanAmount[is.na(loan$LoanAmount)] <- predict(loanFit,newdata=ltest) | |
| loan$Loan_Amount_Term <- as.factor(loan$Loan_Amount_Term) | |
| loan$Loan_Amount_Term[is.na(loan$Loan_Amount_Term)]<-"360" | |
| numDependents <- recode(loan$Dependents,"'3+'='3' ") | |
| numDependents <- as.numeric(as.character(numDependents)) | |
| loan$FamilySize <- ifelse((loan$CoapplicantIncome>0 |loan$Married=="Y"),numDependents+2,numDependents+1) | |
| loan$IncomePC <- loan$TotalIncome/loan$FamilySize | |
| loan$LoanAmountByTotInc <- loan$LoanAmount/loan$TotalIncome | |
| loan$LoanAmountPC <- loan$LoanAmount/loan$IncomePC | |
| loan$Loan_Amount_Term <- as.numeric(as.character(loan$Loan_Amount_Term)) | |
| loan$LoanPerMonth <- loan$LoanAmount/loan$Loan_Amount_Term | |
| loan$LoanPerMOnthByTotInc <- loan$LoanPerMonth/loan$TotalIncome | |
| loan$LoanPerMonthPC <- loan$LoanPerMonth/loan$LoanAmountPC | |
| loan$Loan_Amount_Term <- as.factor(loan$Loan_Amount_Term) | |
| cor(loan$LoanPerMonthByTotInc,loan$LoanPerMonth) | |
| cor(newtrain[,(sapply(newtrain, is.numeric))]) | |
| loan=loan[,-18] | |
| nrow(loan) | |
| nrow(train) | |
| nrow(test) | |
| newtrain <- cbind(loan[1:614,],Loan_Status=train$Loan_Status) | |
| View(newtrain) | |
| newtest<-loan[615:981,] | |
| library("caret") | |
| cntrl<-trainControl(method="repeatedcv",number=5) | |
| rp<-train(x=newtrain[,-c(6,17,19)],y=newtrain[,19],method = "rpart",trControl = cntrl) | |
| cntrl<-trainControl(method="repeatedcv",number=5) | |
| rf1 <- train(x=newtrain[,-c(6,17,19)],y=newtrain[,19],method = 'rf', trControl = cntrl) | |
| pred_rf<-predict(rf1,newdata=newtest) | |
| adg<-data.frame(Loan_ID=test[,1],Loan_Status=pred_rf) | |
| write.csv(adg,file = "loan_output.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment