Titanic RandomForrest v1.2

library(mice) library(randomForest) train<-read.csv(“../input/train.csv”,na.strings=c(‘NA’,”),stringsAsFactors=F) test<-read.csv(“../input/test.csv”,na.strings=c(‘NA’,”),stringsAsFactors=F) check.missing<-function(x) return(paste0(round(sum(is.na(x))/length(x),4)*100,’%’)) data.frame(sapply(train,check.missing)) data.frame(sapply(test,check.missing)) #combine train/test data for pre-processing train$Cat<-‘train’ test$Cat<-‘test’ test$Survived<-NA full<-rbind(train,test) #Embarked full$Embarked[is.na(full$Embarked)]<-‘S’ #Extract Title from Name full$Title = sapply(full$Name,function(x) strsplit(x,'[.,]’)[[1]][2]) full$Title<-gsub(‘ ‘,”,full$Title) #full$Title[full$Title %in% c(‘Capt’, ‘Don’, ‘Major’, ‘Sir’)] <- ‘Sir’ #full$Title[full$Title %in% c(‘Dona’, ‘Lady’, ‘the Countess’, ‘Jonkheer’)] <- ‘Lady’ #Adding FamilySize full$FamilySize<-full$Parch+full$SibSp+1 #Perform Imputation to remove NAs set.seed(144) vars.for.imputation = setdiff(names(full), “Survived”) imputed = complete(mice(full[vars.for.imputation])) full[vars.for.imputation] = imputed #Adding Mother full$Mother0 & full$Age>18 & full$Title!=’Miss’]<-1 #Adding Child full$Child0 & full$Age<=18]<- 1 #FamilyId2 Surname<-sapply(full$Name,function(x) strsplit(x,'[.,]’)[[1]][1]) FamilyId<-paste0(full$FamilySize,Surname) full$FamilyId<-factor(FamilyId) Family<-data.frame(table(FamilyId)) SmallFamily<-Family$FamilyId[Family$Freq<=2] FamilyId[FamilyId %in% SmallFamily]<-‘Small’ full$FamilyId2<-factor(FamilyId) #Exact Deck from Cabin number full$Deck<-sapply(full$Cabin, function(x) strsplit(x,NULL)[[1]][1]) #Excat Position from Cabin number full$CabinNum<-sapply(full$Cabin,function(x) strsplit(x,'[A-Z]’)[[1]][2]) full$num<-as.numeric(full$CabinNum) num<-full$num[!is.na(full$num)] Pos<-kmeans(num,3) full$CabinPos[!is.na(full$num)]<-Pos$cluster full$CabinPos<-factor(full$CabinPos) levels(full$CabinPos)<-c(‘Front’,’End’,’Middle’) full$num<-NULL full<-transform(full, Pclass=factor(Pclass), Sex=factor(Sex), Embarked=factor(Embarked), Title=factor(Title), Mother=factor(Mother), Child=factor(Child), FamilyId2=factor(FamilyId2), Deck=factor(Deck) ) #split train/test data train<-full[full$Cat==’train’,] test<-full[full$Cat==’test’,] train$Survived<-factor(train$Survived) #rf.fit = randomForest(Survived…


Link to Full Article: Titanic RandomForrest v1.2

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!