Test1

import numpy as np import pandas as pd from sklearn.calibration import CalibratedClassifierCV from sklearn.ensemble import RandomForestClassifier from sklearn.grid_search import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.neural_network import BernoulliRBM from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures, Imputer from patsy import dmatrices, dmatrix #Print you can execute arbitrary python code df_train = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) df_test = pd.read_csv(“../input/test.csv”, dtype={“Age”: np.float64}, ) # Drop NaNs df_train.dropna(subset=[‘Survived’, ‘Pclass’, ‘Sex’, ‘Age’, ‘SibSp’, ‘Parch’, ‘Fare’, ‘Embarked’], inplace=True) #print(“nnSummary statistics of training data”) #print(df_train.describe()) # Age imputation df_train.loc[df_train[‘Age’].isnull(), ‘Age’] = np.nanmedian(df_train[‘Age’]) df_test.loc[df_test[‘Age’].isnull(), ‘Age’] = np.nanmedian(df_test[‘Age’]) # Training/testing array creation y_train, X_train = dmatrices(‘Survived ~ Age + Sex + Pclass + SibSp + Parch + Embarked’, df_train) X_test = dmatrix(‘Age + Sex + Pclass + SibSp + Parch + Embarked’, df_test) #…


Link to Full Article: Test1