Random Forest

library(randomForest) # The train and test data is stored in the ../input directory train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) test$Survived <- NA all_data <- rbind(train, test) # Passenger on row 62 and 830 do not have a value for embarkment. # Since many passengers embarked at Southampton, we give them the value S. all_data$Embarked[c(62, 830)] <- “S” # Factorize embarkment codes. all_data$Embarked <- factor(all_data$Embarked) # Passenger on row 1044 has an NA Fare value. Let’s replace it with the median fare value. all_data$Fare[1044] <- median(all_data$Fare, na.rm = TRUE) all_data$family_size <- all_data$SibSp + all_data$Parch + 1 # How to fill in missing Age values? # We make a prediction of a passengers Age using the other variables and a decision tree model. # This time you give method = “anova” since…


Link to Full Article: Random Forest