test(3)

from sklearn.ensemble import AdaBoostClassifier import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.feature_selection import SelectKBest, f_classif train = pd.read_csv(“../input/train.csv”, dtype={“Age”: np.float64}, ) test = pd.read_csv(“../input/test.csv”, dtype={“Age”: np.float64}, ) print(“nnTop of the training data:”) print(train.head()) print(“nnSummary statistics of training data”) print(train.describe()) #train.to_csv(‘copy_of_the_training_data.csv’, index=False) train[“Age”]=train[“Age”].fillna(train[“Age”].median()) test[“Age”]=test[“Age”].fillna(test[“Age”].median()) train.loc[train[“Sex”]==”male”,”Sex”]=0 test.loc[test[“Sex”]==”male”,”Sex”]=0 train.loc[train[“Sex”]==”female”,”Sex”]=1 test.loc[test[“Sex”]==”female”,”Sex”]=1 print(train[“Embarked”].unique()) train[“Embarked”]=train[“Embarked”].fillna(“S”) test[“Embarked”]=test[“Embarked”].fillna(“S”) train.loc[train[“Embarked”]==”S”,”Embarked”]=0 train.loc[train[“Embarked”]==”C”,”Embarked”]=1 train.loc[train[“Embarked”]==”Q”,”Embarked”]=2 test.loc[test[“Embarked”]==”S”,”Embarked”]=0 test.loc[test[“Embarked”]==”C”,”Embarked”]=1 test.loc[test[“Embarked”]==”Q”,”Embarked”]=2 train[“Fare”]=train[“Fare”].fillna(train[“Fare”].median()) test[“Fare”]=test[“Fare”].fillna(test[“Fare”].median()) #Generating a familysize column train[“FamilySize”]=train[“SibSp”]+train[“Parch”] test[“FamilySize”]=train[“SibSp”]+test[“Parch”] train[“NameLength”]=train[“Name”].apply(lambda x:len(x)) test[“NameLength”]=test[“Name”].apply(lambda x:len(x)) import re def get_title(name): # Use a regular expression to search for a title. Titles always consist of capital and lowercase letters, and end with a period. title_search = re.search(‘ ([A-Za-z]+).’, name) # If the title exists, extract and return it. if title_search: return title_search.group(1) return “” # Get all the titles…


Link to Full Article: test(3)