Age decision tree

train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) test$Survived <- 0 combi <- rbind(train, test) combi$Name <- as.character(combi$Name) strsplit(combi$Name[1], split='[,.]’) strsplit(combi$Name[1], split='[,.]’)[[1]] strsplit(combi$Name[1], split='[,.]’)[[1]][2] combi$Title <- sapply(combi$Name, FUN=function(x) {strsplit(x, split='[,.]’)[[1]][2]}) combi$Title <- sub(‘ ‘, ”, combi$Title) combi$Title[combi$Title %in% c(‘Mme’, ‘Mlle’)] <- ‘Mlle’ combi$Title[combi$Title %in% c(‘Capt’, ‘Don’, ‘Major’, ‘Sir’)] <- ‘Sir’ combi$Title[combi$Title %in% c(‘Dona’, ‘Lady’, ‘the Countess’, ‘Jonkheer’)] <- ‘Lady’ combi$Title <- factor(combi$Title) # Passenger on row 62 and 830 do not have a value for embarkment. # Since many passengers embarked at Southampton, we give them the value S. # We code all embarkment codes as factors. combi$Embarked[c(62,830)] = “S” combi$Embarked <- factor(combi$Embarked) # Passenger on row 1044 has an NA Fare value. Let’s replace it with the median fare value. combi$Fare[1044] <- median(combi$Fare, na.rm=TRUE) library(“rpart”) library(“rpart.plot”) #library(“rattle”) # How to fill…


Link to Full Article: Age decision tree