borrowed1

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier def df_cleaner(df): “”” Clean up a few variables in the training/test sets. “”” # Clean up ages. for passenger in df[(df[‘Age’].isnull())].index: df.loc[passenger, ‘Age’] = np.average(df[(df[‘Age’].notnull())][‘Age’]) # Clean up fares. for passenger in df[(df[‘Fare’].isnull())].index: df.loc[passenger, ‘Fare’] = np.average(df[(df[‘Fare’].notnull())][‘Fare’]) # Manually convert values to numeric columns for clarity. # Change the sex to a binary column. df[‘Sex’][(df[‘Sex’] == ‘male’)] = 0 df[‘Sex’][(df[‘Sex’] == ‘female’)] = 1 df[‘Sex’][(df[‘Sex’].isnull())] = 2 # Transform to categorical data. df[‘Embarked’][(df[‘Embarked’] == ‘S’)] = 0 df[‘Embarked’][(df[‘Embarked’] == ‘C’)] = 1 df[‘Embarked’][(df[‘Embarked’] == ‘Q’)] = 2 df[‘Embarked’][(df[‘Embarked’].isnull())] = 3 return df def main(): “”” Visualization of random forest accuracy as function of the number of tress available in the ensemble. “”” # Read…


Link to Full Article: borrowed1