RandomForest – 01

# Load datasets titanic_train <- read.csv(“../input/train.csv”,stringsAsFactors = FALSE) titanic_test <- read.csv(“../input/test.csv”,stringsAsFactors = FALSE) #summary(titanic_test) #summary(titanic_train) # merge both datasets for imputation of missing values titanic_test$Survived <- NA titanic_all <- rbind(titanic_test, titanic_train) # Convert to factors titanic_all$Pclass <- factor(titanic_all$Pclass) titanic_all$Embarked <- factor(titanic_all$Embarked, levels = c(“S”,”C”,”Q”), labels = c(“Southampton”, “Queenstown”,”Cherbourg”)) titanic_all$Sex <- factor(titanic_all$Sex) titanic_all$Survived <- factor(titanic_all$Survived, levels = c(0,1),labels = c(“no”,”yes”)) # Extract titles extractTitle <- function (x){ title = regmatches(x,regexec(“w+.”,x)) title = gsub(“[.]”,””,unlist(title)) return(unlist(title)) } titanic_all$Title <- factor(unlist(lapply(titanic_all$Name,extractTitle))) #summary(titanic_all) # We have the missing values: # Age: 263 NAs # Fare: 1 NA # Embarked: 2 library(mice) # impute missing values with mice() function selected_cols <- c(“Pclass”,”Sex”,”Age”,”SibSp”, “Parch”, “Fare”, “Embarked”) imputation <- complete(mice(titanic_all[selected_cols])) titanic_all[selected_cols] <- imputation # split in train and test datasets again titanic_train <- titanic_all[!is.na(titanic_all$Survived),] titanic_test <- titanic_all[is.na(titanic_all$Survived),] #…


Link to Full Article: RandomForest – 01

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!