TitanicExampleImproved

import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier #Print you can execute arbitrary python code train = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) test = pd.read_csv(“../input/test.csv”, dtype={“Age”: np.float64}, ) #Print to standard output, and see the results in the “log” section below after running your script print(“nnTop of the training data:”) print(train.head()) print(“nnSummary statistics of training data”) print(train.describe()) train[“Age”] = train[“Age”].fillna(train[“Age”].median()) # Replace all the occurences of male with the number 0. train.loc[train[“Sex”] == “male”, “Sex”] = 0 train.loc[train[“Sex”] == “female”, “Sex”] = 1 train[“Embarked”] = train[“Embarked”].fillna(“S”) train.loc[train[“Embarked”] == “S”, “Embarked”] = 0 train.loc[train[“Embarked”] == “C”, “Embarked”] = 1 train.loc[train[“Embarked”] == “Q”, “Embarked”] = 2 # Import the linear regression class from sklearn.linear_model import LogisticRegression from sklearn import cross_validation # The columns we’ll use to predict the target…


Link to Full Article: TitanicExampleImproved