PclassAgeSex.r

# This R script will run on our backend. You can write arbitrary code here! # Many standard libraries are already installed, such as randomForest library(randomForest) library(ggplot2) # The train and test data is stored in the ../input directory set.seed(1) train <- read.csv(“../input/train.csv”, stringsAsFactors=FALSE) test <- read.csv(“../input/test.csv”, stringsAsFactors=FALSE) extractFeatures <- function(data) { features <- c(“Pclass”, “Age”, “Sex”) fea <- data[,features] fea$Age[is.na(fea$Age)] <- -1 fea$Sex <- as.factor(fea$Sex) return(fea) } rf <- randomForest(extractFeatures(train), as.factor(train$Survived), ntree=100, importance=TRUE) submission <- data.frame(PassengerId = test$PassengerId) submission$Survived <- predict(rf, extractFeatures(test)) write.csv(submission, file = “1_random_forest_r_submission.csv”, row.names=FALSE) imp <- importance(rf, type=1) featureImportance <- data.frame(Feature=row.names(imp), Importance=imp[,1]) p <- ggplot(featureImportance, aes(x=reorder(Feature, Importance), y=Importance)) + geom_bar(stat=”identity”, fill=”#53cfff”) + coord_flip() + theme_light(base_size=20) + xlab(“”) + ylab(“Importance”) + ggtitle(“Random Forest Feature Importancen”) + theme(plot.title=element_text(size=18)) ggsave(“2_feature_importance.png”, p) This script has been released under the Apache…


Link to Full Article: PclassAgeSex.r