Getting Started with Titanic

import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import KFold from sklearn import cross_validation titanic = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) # Preprocessing Data # ================== # Fill in missing value in “Age”. titanic[“Age”] = titanic[“Age”].fillna(titanic[“Age”].median()) # Replace all the occurences of male with the number 0. titanic.loc[titanic[“Sex”] == “male”, “Sex”] = 0 titanic.loc[titanic[“Sex”] == “female”, “Sex”] = 1 # Convert the Embarked Column. titanic[“Embarked”] = titanic[“Embarked”].fillna(“S”) titanic.loc[titanic[“Embarked”] == “S”, “Embarked”] = 0 titanic.loc[titanic[“Embarked”] == “C”, “Embarked”] = 1 titanic.loc[titanic[“Embarked”] == “Q”, “Embarked”] = 2 # The columns we’ll use to predict the target predictors = [“Pclass”, “Sex”, “Age”, “SibSp”, “Parch”, “Fare”, “Embarked”] # Linear Regression # ================= alg = LinearRegression() # Generate cross validation folds for the titanic dataset. It…


Link to Full Article: Getting Started with Titanic