Final Submission

library(randomForest) library(rpart) train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) extract_title <- function(name) { title <- strsplit(as.character(name), split='[,.]’)[[1]][2] gsub(‘ ‘, ”, title) } get_mode <- function(column) { tmp <- table(as.vector(column)) mode <- names(tmp)[tmp == max(tmp)] } fill_age <- function(all) { #ctl = rpart.control(minsplit=30) tree <- rpart(formula=Age ~ Title + Pclass + Fare + FamilySize + SibSp + Parch, data=all[!is.na(all$Age),], method=”anova”) all$Age[is.na(all$Age)] <- predict(tree, all[is.na(all$Age),]) all } #Merge the two datasets to make feature engineering easier. #If done separately there would be differing levels of some factors, #which would then need to be unified later. test$Survived <- NA merged <- rbind(train, test) merged$Title <- sapply(merged$Name, FUN=extract_title) merged$FamilySize <- merged$SibSp + merged$Parch + 1 #Combine some titles that mean the same things in different languages, or imply the same status merged$Title[merged$Title == ‘Mlle’] <-…


Link to Full Article: Final Submission

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!