aRandom Forest Benchmark (R)

# This script trains a Random Forest model based on the data, # saves a sample submission, and plots the relative importance # of the variables in making predictions # Download 1_random_forest_r_submission.csv from the output below # and submit it through https://www.kaggle.com/c/titanic-gettingStarted/submissions/attach # to enter this getting started competition! library(ggplot2) library(randomForest) set.seed(1) train <- read.csv(“../input/train.csv”, stringsAsFactors=FALSE) test <- read.csv(“../input/test.csv”, stringsAsFactors=FALSE) extractFeatures <- function(data) { features <- c(“Pclass”, “Age”, “Sex”, “Parch”, “SibSp”, “Fare”, “Embarked”) fea <- data[,features] fea$Age[is.na(fea$Age)] <- -1 fea$Fare[is.na(fea$Fare)] <- median(fea$Fare, na.rm=TRUE) fea$Embarked[fea$Embarked==””] = “S” fea$Sex <- as.factor(fea$Sex) fea$Embarked <- as.factor(fea$Embarked) return(fea) } rf <- randomForest(extractFeatures(train), as.factor(train$Survived), ntree=100, importance=TRUE) submission <- data.frame(PassengerId = test$PassengerId) submission$Survived <- predict(rf, extractFeatures(test)) write.csv(submission, file = “1_random_forest_r_submission.csv”, row.names=FALSE) imp <- importance(rf, type=1) featureImportance <- data.frame(Feature=row.names(imp), Importance=imp[,1]) p <- ggplot(featureImportance, aes(x=reorder(Feature, Importance), y=Importance)) + geom_bar(stat=”identity”,…


Link to Full Article: aRandom Forest Benchmark (R)

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!