pprueba1

loaddata <- function(file) { data <- read.csv(file, header = TRUE, stringsAsFactors=F) # compute family size on dataset data$FamilySize <- data$SibSp + data$Parch data } data <- loaddata(“../input/train.csv”) # load real test data titanic_test <- loaddata(“../input/test.csv”) # change survived from integer to boolean data$Survived <- as.logical(data$Survived) levels(data$Survived) <- c(“Not survived”, “Survived”) # make explicit factor levels for specific variables: 3=Pclass, 5=Sex, 12=Embarked for(i in c(3,5,12)) { data[,i] <- as.factor(data[,i]) } # break up training set into subset training and test set library(caret) set.seed(820) inTrainingSet <- createDataPartition(data$Survived, p = 0.5, list=FALSE) train <- data[inTrainingSet,] test <- data[-inTrainingSet,] modelaccuracy <- function(test, rpred) { result_1 <- test$Survived == rpred sum(result_1) / length(rpred) } checkaccuracy bestaccuracy) { bestaccuracy <- accuracy assign(“bestaccuracy”, accuracy, envir = .GlobalEnv) label <- ‘better’ } else if (accuracy < bestaccuracy) {…


Link to Full Article: pprueba1