Decision Tree Survivors

import numpy as np import pandas as pd train = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) test = pd.read_csv(“../input/test.csv”, dtype={“Age”: np.float64}, ) def harmonize_data(titanic): titanic[“Age”] = titanic[“Age”].fillna(titanic[“Age”].mean()) titanic[“Age”].mean() titanic.loc[titanic[“Sex”] == “male”, “Sex”] = 1 titanic.loc[titanic[“Sex”] == “female”, “Sex”] = 0 titanic[“Embarked”] = titanic[“Embarked”].fillna(“S”) titanic.loc[titanic[“Embarked”] == “S”, “Embarked”] = 0 titanic.loc[titanic[“Embarked”] == “C”, “Embarked”] = 1 titanic.loc[titanic[“Embarked”] == “Q”, “Embarked”] = 2 titanic[“Fare”] = titanic[“Fare”].fillna(titanic[“Fare”].mean()) return titanic def create_submission(alg, train, test, predictors, filename): alg.fit(train[predictors], train[“Survived”]) predictions = alg.predict(test[predictors]) submission = pd.DataFrame({ “PassengerId”: test[“PassengerId”], “Survived”: predictions }) submission.to_csv(filename, index=False) train_data = harmonize_data(train) test_data = harmonize_data(test) from sklearn.tree import DecisionTreeClassifier from sklearn import cross_validation as cv predictors = [“Pclass”, “Sex”, “Age”, “SibSp”, “Parch”, “Fare”, “Embarked”] alg = DecisionTreeClassifier(max_depth=6) scores = cv.cross_val_score( alg, train_data[predictors], train_data[“Survived”], cv=5 ) print(scores.mean()) create_submission(alg, train_data, test_data, predictors, “dtsurvivors.csv”) This script has…


Link to Full Article: Decision Tree Survivors