Big ship, Bigger iceberg!

# -*- coding: utf-8 -*- “”” —————————————————————————– Kaggle Titanic Exploration Author: Raj Saha —————————————————————————– “”” from pandas import Series, DataFrame import pandas as pd import numpy as np import os import matplotlib.pylab as plt from sklearn.cross_validation import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report import sklearn.metrics from sklearn import preprocessing import seaborn as sns #os.chdir(“C:SparkCoursekaggletitanic”) “”” Data Engineering and Analysis “”” #Load the dataset #train_data = pd.read_csv(“train.csv”) train_data = pd.read_csv(“../input/train.csv”) #test_data = pd.read_csv(“test.csv”) test_data = pd.read_csv(“../input/test.csv”) “”” Data Transformations Let us do the following transformations 1. Convert Date into separate columns – year, month, week 2. Convert all non numeric data to numeric “”” # Dropping unnecessary columns train_data = train_data.drop([‘Name’,’Ticket’,’Embarked’,’Cabin’], axis=1) test_data = test_data.drop([‘Name’,’Ticket’,’Embarked’,’Cabin’], axis=1) test_data.dtypes #Check if Age has Null values – either sum or any…

