Appendix B — Component Analysis Code (PCA and MCA)

B.1 Module and Data Imports

import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.cluster import KMeans

from mpl_toolkits.mplot3d import Axes3D

from sklearn.preprocessing import OneHotEncoder

from prince import MCA

fr = pd.read_csv('../data/final_clean.csv')

B.1.1 Principal Component Analysis

B.1.1.1 Select & Scale Numeric Columns

columns = [
    #'loan_amount',
    #'property_value',
    'income',
    'interest_rate',
    'total_loan_costs',
    'loan_to_value_ratio',
    #'origination_costs',
    #'discount_points',
    #'lender_credits',
    'loan_term',
    'intro_rate_period',
    'total_units',
    'tract_minority_population_percent',
    'tract_population',
    'tract_to_msa_income_percentage',
    'tract_owner_occupied_units',
    'tract_one_to_four_family_homes',
    'tract_median_age_of_housing_units',
    #'debt_to_income_ratio'
]

X = fr[columns]

X = StandardScaler().fit_transform(X)

B.1.1.2 Perform 2D PCA

pca2d = PCA(n_components=2)
result2d = pd.DataFrame(pca2d.fit_transform(X))
result2d['outcome']  = fr['outcome'].astype(bool)

display(
    np.cumsum(pca2d.explained_variance_) #eigenvalues
)

sns.scatterplot(
    data=result2d,
    x=0,y=1,hue='outcome'
)
np.cumsum(pca2d.explained_variance_ratio_)
array([2.71077093, 4.35879384])
array([0.20851998, 0.33529045])

B.1.1.3 Perform 3D PCA

pca3d = PCA(n_components=3)
result3d = pd.DataFrame(pca3d.fit_transform(X))
result3d['outcome']  = fr['outcome'].astype(bool)
display(
    np.cumsum(pca3d.explained_variance_) #eigenvalues
)
result3d
np.cumsum(pca3d.explained_variance_ratio_)
array([2.71077093, 4.35879384, 5.61886437])
array([0.20851998, 0.33529045, 0.43221855])
fig = plt.figure(figsize=(12,12))
ax = Axes3D(fig,rect=[0,0,.9,1],elev=5,azim=225)

fig.add_axes(ax)

x=result3d[0]
y=result3d[1]
z=result3d[2]

ax.scatter(x,y,z, cmap="RdYlGn", edgecolor='k', s=40, c=fr['outcome'].astype(int))

plt.show()