Decision Tree Survivors

import numpy as np import pandas as pd train = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) test = pd.read_csv(“../input/test.csv”, dtype={“Age”: np.float64}, ) def harmonize_data(titanic): titanic[“Age”] = titanic[“Age”].fillna(titanic[“Age”].mean()) titanic[“Age”].mean() titanic.loc[titanic[“Sex”] == “male”, “Sex”] = 1 titanic.loc[titanic[“Sex”] == “female”, “Sex”] = 0 titanic[“Embarked”] = titanic[“Embarked”].fillna(“S”) titanic.loc[titanic[“Embarked”] == “S”, “Embarked”] = 0 titanic.loc[titanic[“Embarked”] == “C”, “Embarked”] = 1 titanic.loc[titanic[“Embarked”] == “Q”, “Embarked”] = 2 titanic[“Fare”] = titanic[“Fare”].fillna(titanic[“Fare”].mean()) return titanic def create_submission(alg, train, test, predictors, filename): alg.fit(train[predictors], train[“Survived”]) predictions = alg.predict(test[predictors]) submission = pd.DataFrame({ “PassengerId”: test[“PassengerId”], “Survived”: predictions }) submission.to_csv(filename, index=False) train_data = harmonize_data(train) test_data = harmonize_data(test) from sklearn.tree import DecisionTreeClassifier from sklearn import cross_validation as cv predictors = [“Pclass”, “Sex”, “Age”, “SibSp”, “Parch”, “Fare”, “Embarked”] alg = DecisionTreeClassifier(max_depth=6) scores = cv.cross_val_score( alg, train_data[predictors], train_data[“Survived”], cv=5 ) print(scores.mean()) create_submission(alg, train_data, test_data, predictors, “dtsurvivors.csv”) This script has…


Link to Full Article: Decision Tree Survivors

Pin It on Pinterest

Share This

Join Our Newsletter

Sign up to our mailing list to receive the latest news and updates about homeAI.info and the Informed.AI Network of AI related websites which includes Events.AI, Neurons.AI, Awards.AI, and Vocation.AI

You have Successfully Subscribed!