Final Submission

library(randomForest) library(rpart) train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) extract_title <- function(name) { title <- strsplit(as.character(name), split='[,.]’)[[1]][2] gsub(‘ ‘, ”, title) } get_mode <- function(column) { tmp <- table(as.vector(column)) mode <- names(tmp)[tmp == max(tmp)] } fill_age <- function(all) { #ctl = rpart.control(minsplit=30) tree <- rpart(formula=Age ~ Title + Pclass + Fare + FamilySize + SibSp + Parch, data=all[!is.na(all$Age),], method=”anova”) all$Age[is.na(all$Age)] <- predict(tree, all[is.na(all$Age),]) all } #Merge the two datasets to make feature engineering easier. #If done separately there would be differing levels of some factors, #which would then need to be unified later. test$Survived <- NA merged <- rbind(train, test) merged$Title <- sapply(merged$Name, FUN=extract_title) merged$FamilySize <- merged$SibSp + merged$Parch + 1 #Combine some titles that mean the same things in different languages, or imply the same status merged$Title[merged$Title == ‘Mlle’] <-…


Link to Full Article: Final Submission