Appendix C — Association Rule Mining Code

C.1 Module and Data Imports

import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.cluster import KMeans

from mpl_toolkits.mplot3d import Axes3D

from mlxtend.frequent_patterns import apriori

from mlxtend.frequent_patterns import association_rules

C.2 Transform to Basket Transactions

mapper = {
    'applicant_race':{
        'American Indian/Alaska Native':0b0000000000000000001,
        'Asian':0b0000000000000000010,
        'Asian Indian':0b0000000000000000100,
        'Chinese':0b0000000000000001000,
        'Filipino':0b0000000000000010000,
        'Japanese':0b0000000000000100000,
        'Korean':0b0000000000001000000,
        'Vietnamese':0b0000000000010000000,
        'Other Asian':0b0000000000100000000,
        'Black/African American':0b0000000001000000000,
        'Native Hawaiian/Pacific Islander':0b0000000010000000000,
        'Native Hawaiian':0b0000000100000000000,
        'Guamanian/Chamorro':0b0000001000000000000,
        'Samoan':0b0000010000000000000,
        'Other Pacific Islander':0b0000100000000000000,
        'White':0b0001000000000000000,
        'Information not provided':0b0010000000000000000,
        'Not Applicable':0b0100000000000000000,
        'No Co-applicant':0b1000000000000000000
    },
    # 'co-applicant_race':{
    #     'American Indian/Alaska Native':0b0000000000000000001,
    #     'Asian':0b0000000000000000010,
    #     'Asian Indian':0b0000000000000000100,
    #     'Chinese':0b0000000000000001000,
    #     'Filipino':0b0000000000000010000,
    #     'Japanese':0b0000000000000100000,
    #     'Korean':0b0000000000001000000,
    #     'Vietnamese':0b0000000000010000000,
    #     'Other Asian':0b0000000000100000000,
    #     'Black/African American':0b0000000001000000000,
    #     'Native Hawaiian/Pacific Islander':0b0000000010000000000,
    #     'Native Hawaiian':0b0000000100000000000,
    #     'Guamanian/Chamorro':0b0000001000000000000,
    #     'Samoan':0b0000010000000000000,
    #     'Other Pacific Islander':0b0000100000000000000,
    #     'White':0b0001000000000000000,
    #     'Information not provided':0b0010000000000000000,
    #     'Not Applicable':0b0100000000000000000,
    #     'No Co-applicant':0b1000000000000000000
    # },
    'applicant_ethnicity':{
        'Hispanic/Latino':0b000000001,
        'Mexican':0b000000010,
        'Puerto Rican':0b000000100,
        'Cuban':0b000001000,
        'Other Hispanic/Latino':0b000010000,
        'Not Hispanic/Latino':0b000100000,
        'Information Not Provided':0b001000000,
        'Not Applicable':0b010000000,
        'No Co-applicant':0b100000000
    },
    # 'co-applicant_ethnicity':{
    #     'Hispanic/Latino':0b000000001,
    #     'Mexican':0b000000010,
    #     'Puerto Rican':0b000000100,
    #     'Cuban':0b000001000,
    #     'Other Hispanic/Latino':0b000010000,
    #     'Not Hispanic/Latino':0b000100000,
    #     'Information Not Provided':0b001000000,
    #     'Not Applicable':0b010000000,
    #     'No Co-applicant':0b100000000
    # },
    'aus':{
        'Desktop Underwriter':0b00000001,
        'Loan Prospector/Product Advisor':0b00000010,
        'TOTAL Scorecard':0b00000100,
        'GUS':0b00001000,
        'Other':0b00010000,
        'Internal Proprietary':0b00100000,
        'Not applicable':0b01000000,
        'Exempt':0b10000000,
    }, 
    'denial_reason':{
        'DTI':0b0000000001,
        'Employment History':0b0000000010,
        'Credit History':0b0000000100,
        'Collateral':0b0000001000,
        'Insufficient Cash':0b0000010000,
        'Unverifiable Information':0b0000100000,
        'Credit Application Incomplete':0b0001000000,
        'Mortgage Insurance Denied':0b0010000000,
        'Other':0b0100000000,
        'Not Applicable':0b1000000000
    }
}

new_mapper = {}
for k,v in mapper.items():
    new_mapper[k] = {}
    #print(k)
    for j,w in v.items():
        #print(w,j)
        new_mapper[k][w] = j
fr2 = pd.read_csv('../data/final_clean_r2.csv')#fr.copy()

pct = [20,40,60,80]

levels = ['0-20','21-40','41-60','61-80','>80']

pct_cols = [
    'income',
    'debt_to_income_ratio',
    'loan_to_value_ratio',
    'tract_minority_population_percent',
    'tract_to_msa_income_percentage',
    'tract_median_age_of_housing_units',
    'interest_rate'
]

for col in pct_cols:
    p = list(map(lambda x: np.percentile(fr2[col],x),pct))
    p = [-np.inf] + p + [np.inf]
    fr2[col] = pd.cut(fr2[col],bins=p,labels=levels)
basket = []
bc = [
    'income','debt_to_income_ratio','loan_to_value_ratio',
    'tract_minority_population_percent','tract_to_msa_income_percent',
    'derived_sex'
]
b1c = ['interest_rate','company','applicant_race']
b2c = ['interest_rate','company','outcome']
b3c = ['interest_rate','applicant_race','outcome']

b1,b2,b3 = [],[],[]

for i, row in fr2.iterrows():
    curr = []
    for k,v in new_mapper.items():
        for j,w in v.items():
            #print(row[k],type(row[k]))
            if row[k] & j > 0:
                curr.append("{}:{}".format(k,w))

    if row['balloon_payment'] == 1:
        curr.append('balloon')
    
    if row['interest_only_payment'] == 1:
        curr.append('interest only')
    
    curr.append("{} rooms".format(row['total_units']))

    for col in pct_cols:
        curr.append("{}:{}".format(col,row[col]))
    
    # curr.append(row['company'])

    curr.append(row['derived_sex'])

    curr.append("age_category:{}".format(row['applicant_age']))

    basket.append(curr)
items = set()
for trans in basket:
    for item in trans:
        items.add(item)

result = []
for record in basket:
    rowset = set(record)
    labels = {}
    uncommons = list(items-rowset)
    commons = list(items.intersection(rowset))
    for uc in uncommons:
        labels[uc] = False
    for com in commons:
        labels[com] = True
    result.append(labels)

ohe_df = pd.DataFrame(result)

single_basket = ohe_df.replace(
    [0],[np.nan]
).reset_index()

single_basket = single_basket.melt(
    id_vars='index',
    value_vars=single_basket.columns[1:]
)

single_basket.sort_values(by='index',inplace=True)
single_basket.dropna(inplace=True)

# display(
#     ohe_df.head(),
#     single_basket.head()
# )

single_basket.to_csv('../data/ARM_single_basket.csv',index=False)

C.2.1 Construct Frequent Items

freq_items=apriori(ohe_df,min_support=0.05,use_colnames=True,verbose=1)

Processing 3306 combinations | Sampling itemset size 2
Processing 25998 combinations | Sampling itemset size 3
Processing 31860 combinations | Sampling itemset size 4
Processing 29945 combinations | Sampling itemset size 5
Processing 12594 combinations | Sampling itemset size 6
Processing 1253 combinations | Sampling itemset size 7
ars = association_rules(freq_items,metric='support',min_threshold=0.5)
ars.sort_values(by='lift',ascending=False,inplace=True)

C.2.2 Construct Association Rules (Python)

C.2.3 Save the Top 15 Association Rules by Category

C.2.4 Performing Rule Mining and Visualization in R

Rule mining is performed on data that was already transformed in Python from previous steps

library(tidyverse)
library(arules)
library(arulesViz)

First load the data into R for use in rule mining.

dat <- read.transactions(
    'C:/Users/pconn/OneDrive/Desktop/Machine Learning/ML/data/single_basket.csv',
    sep=',',
    rm.duplicates=TRUE,
    format='single',
    cols=c(1,2)
)

Now that the data is loaded, R is leveraged to perform rule mining

set.seed = 9001

a_rules <- arules::apriori(
    dat,
    control=list(verbose=F),
    parameter=list(support=0.04,confidence=0.01,minlen=2)
)

Using the mined rules, the rules can now be printed and/or visualized.

sorted_arules <- sort(a_rules,by='support',decreasing = T)
arules::inspect(sorted_arules[1:15])
     lhs                                           rhs                                         support confidence  coverage     lift  count
[1]  {approve}                                  => {1 rooms}                                 0.8465587  0.9897986 0.8552837 1.001607 172124
[2]  {1 rooms}                                  => {approve}                                 0.8465587  0.8566580 0.9882108 1.001607 172124
[3]  {applicant_ethnicity:Not Hispanic/Latino}  => {1 rooms}                                 0.7152005  0.9893591 0.7228928 1.001162 145416
[4]  {1 rooms}                                  => {applicant_ethnicity:Not Hispanic/Latino} 0.7152005  0.7237327 0.9882108 1.001162 145416
[5]  {applicant_ethnicity:Not Hispanic/Latino}  => {approve}                                 0.6253676  0.8650905 0.7228928 1.011466 127151
[6]  {approve}                                  => {applicant_ethnicity:Not Hispanic/Latino} 0.6253676  0.7311815 0.8552837 1.011466 127151
[7]  {applicant_ethnicity:Not Hispanic/Latino,                                                                                             
      approve}                                  => {1 rooms}                                 0.6195837  0.9907512 0.6253676 1.002571 125975
[8]  {1 rooms,                                                                                                                             
      applicant_ethnicity:Not Hispanic/Latino}  => {approve}                                 0.6195837  0.8663077 0.7152005 1.012889 125975
[9]  {1 rooms,                                                                                                                             
      approve}                                  => {applicant_ethnicity:Not Hispanic/Latino} 0.6195837  0.7318852 0.8465587 1.012439 125975
[10] {applicant_race:White}                     => {1 rooms}                                 0.6052911  0.9917401 0.6103324 1.003571 123069
[11] {1 rooms}                                  => {applicant_race:White}                    0.6052911  0.6125121 0.9882108 1.003571 123069
[12] {aus:Desktop Underwriter}                  => {1 rooms}                                 0.5645626  0.9910383 0.5696678 1.002861 114788
[13] {1 rooms}                                  => {aus:Desktop Underwriter}                 0.5645626  0.5712977 0.9882108 1.002861 114788
[14] {applicant_race:White}                     => {approve}                                 0.5349397  0.8764727 0.6103324 1.024774 108765
[15] {approve}                                  => {applicant_race:White}                    0.5349397  0.6254529 0.8552837 1.024774 108765
sorted_arules <- sort(a_rules,by='confidence',decreasing = T)
arules::inspect(sorted_arules[1:15])
     lhs                                         rhs            support confidence   coverage     lift count
[1]  {interest_rate:>80}                      => {approve}   0.18578904          1 0.18578904 1.169203 37775
[2]  {aus:Loan Prospector/Product Advisor,                                                                  
      aus:Other}                              => {JP Morgan} 0.15580213          1 0.15580213 4.971320 31678
[3]  {interest_rate:>80,                                                                                    
      loan_to_value_ratio:61-80}              => {approve}   0.04241056          1 0.04241056 1.169203  8623
[4]  {interest_rate:>80,                                                                                    
      tract_minority_population_percent:0-20} => {approve}   0.04197283          1 0.04197283 1.169203  8534
[5]  {debt_to_income_ratio:61-80,                                                                           
      interest_rate:>80}                      => {approve}   0.04571074          1 0.04571074 1.169203  9294
[6]  {interest_rate:>80,                                                                                    
      tract_to_msa_income_percentage:21-40}   => {approve}   0.04036454          1 0.04036454 1.169203  8207
[7]  {debt_to_income_ratio:21-40,                                                                           
      interest_rate:>80}                      => {approve}   0.04220891          1 0.04220891 1.169203  8582
[8]  {Female,                                                                                               
      interest_rate:>80}                      => {approve}   0.04209579          1 0.04209579 1.169203  8559
[9]  {2.0,                                                                                                  
      interest_rate:>80}                      => {approve}   0.04509104          1 0.04509104 1.169203  9168
[10] {1.0,                                                                                                  
      interest_rate:>80}                      => {approve}   0.05827702          1 0.05827702 1.169203 11849
[11] {interest_rate:>80,                                                                                    
      loan_to_value_ratio:21-40}              => {approve}   0.05279802          1 0.05279802 1.169203 10735
[12] {interest_rate:>80,                                                                                    
      Joint}                                  => {approve}   0.06351993          1 0.06351993 1.169203 12915
[13] {interest_rate:>80,                                                                                    
      Male}                                   => {approve}   0.06214773          1 0.06214773 1.169203 12636
[14] {aus:Loan Prospector/Product Advisor,                                                                  
      interest_rate:>80}                      => {approve}   0.08140290          1 0.08140290 1.169203 16551
[15] {interest_rate:>80,                                                                                    
      Rocket Mortgage}                        => {approve}   0.10214832          1 0.10214832 1.169203 20769
sorted_arules <- sort(a_rules,by='lift',decreasing = T)
arules::inspect(sorted_arules[1:15])
     lhs                                                rhs                                      support confidence   coverage     lift count
[1]  {applicant_ethnicity:Mexican}                   => {applicant_ethnicity:Hispanic/Latino} 0.04056128  0.9169446 0.04423525 8.207934  8247
[2]  {applicant_ethnicity:Hispanic/Latino}           => {applicant_ethnicity:Mexican}         0.04056128  0.3630800 0.11171442 8.207934  8247
[3]  {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.07532387  0.7362273 0.10231062 7.834365 15315
[4]  {applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.07603703  0.7353151 0.10340740 7.824658 15460
[5]  {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.06595941  0.7333224 0.08994600 7.803453 13411
[6]  {applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.06651026  0.7322395 0.09083129 7.791930 13523
[7]  {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      aus:Desktop Underwriter,                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.05100776  0.7264131 0.07021867 7.729930 10371
[8]  {applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      aus:Desktop Underwriter,                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.05134712  0.7252518 0.07079903 7.717572 10440
[9]  {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      aus:Desktop Underwriter,                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.04512547  0.7246663 0.06227068 7.711341  9175
[10] {applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      aus:Desktop Underwriter,                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.04538122  0.7233459 0.06273792 7.697291  9227
[11] {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      aus:Desktop Underwriter}                       => {Sex Not Available}                   0.04939456  0.6718625 0.07351885 7.149444 10043
[12] {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      Rocket Mortgage}                               => {Sex Not Available}                   0.07602227  0.6709350 0.11330795 7.139574 15457
[13] {applicant_ethnicity:Information Not Provided,                                                                                          
      applicant_race:Information not provided,                                                                                               
      approve,                                                                                                                               
      aus:Desktop Underwriter}                       => {Sex Not Available}                   0.04969457  0.6703377 0.07413364 7.133218 10104
[14] {applicant_ethnicity:Information Not Provided,                                                                                          
      Rocket Mortgage}                               => {Sex Not Available}                   0.07674034  0.6700880 0.11452278 7.130562 15603
[15] {1 rooms,                                                                                                                               
      applicant_ethnicity:Information Not Provided,                                                                                          
      approve,                                                                                                                               
      Rocket Mortgage}                               => {Sex Not Available}                   0.06656437  0.6685768 0.09956129 7.114480 13534
sub <- head(sort(a_rules,by='lift',decreasing = T),10)
plot(sub,method="graph",engine="html")

C.2.5 Examining Organization Specific Rules

NFCU <- subset(dat, subset = items %in% "Navy Federal Credit Union")
JPM <- subset(dat, subset = items %in% 'JP Morgan')
BOA <- subset(dat, subset = items %in% 'Bank of America')
WF <- subset(dat, subset = items %in% 'Wells Fargo')
RM <- subset(dat, subset = items %in% 'Rocket Mortgage')

get_rules <- function(trns,appear,sup,conf,len){
    arules::apriori(
        trns,parameter=list(support=sup,confidence=conf,minlen=len) ,
        control=list(verbose=F),
        appearance = appear
    )
}

NFCU_app <- get_rules(trns=NFCU,appear=list(default='lhs',rhs='approve'),sup=0.04,conf=0.01,len=3)
NFCU_den <- get_rules(trns=NFCU,appear=list(default='lhs',rhs='deny'),sup=0.04,conf=0.01,len=3)

JPM_app <- get_rules(trns=JPM,appear=list(default='lhs',rhs='approve'),sup=0.04,conf=0.01,len=3)
JPM_den <- get_rules(trns=JPM,appear=list(default='lhs',rhs='deny'),sup=0.04,conf=0.01,len=3)

BOA_app <- get_rules(trns=BOA,appear=list(default='lhs',rhs='approve'),sup=0.04,conf=0.01,len=3)
BOA_den <- get_rules(trns=BOA,appear=list(default='lhs',rhs='deny'),sup=0.04,conf=0.01,len=3)

WF_app <- get_rules(trns=WF,appear=list(default='lhs',rhs='approve'),sup=0.04,conf=0.01,len=3)
WF_den <- get_rules(trns=WF,appear=list(default='lhs',rhs='deny'),sup=0.04,conf=0.01,len=3)

RM_app <- get_rules(trns=RM,appear=list(default='lhs',rhs='approve'),sup=0.04,conf=0.01,len=3)
RM_den <- get_rules(trns=RM,appear=list(default='lhs',rhs='deny'),sup=0.04,conf=0.01,len=3)
plot(sort(NFCU_den,by='lift')[1:10],engine='html',method='graph')
plot(sort(JPM_den,by='lift')[1:10],engine='html',method='graph')
plot(sort(BOA_den,by='lift')[1:10],engine='html',method='graph')
plot(sort(WF_den,by='lift')[1:10],engine='html',method='graph')
plot(sort(RM_den,by='lift')[1:10],engine='html',method='graph')
a_rules_den <- arules::apriori(
    dat,
    control=list(verbose=F),
    parameter=list(support=0.04,confidence=0.01,minlen=2),
    appearance = list(default='lhs',rhs='deny')
)

a_rules_app <- arules::apriori(
    dat,
    control=list(verbose=F),
    parameter=list(support=0.04,confidence=0.01,minlen=2),
    appearance = list(default='lhs',rhs='approve')
)
inspect(sort(
    subset(
        a_rules_den,lhs %pin% 'race:'
    ), by='lift'
))
    lhs                                           rhs       support confidence  coverage      lift count
[1] {applicant_race:White,                                                                              
     interest_rate:41-60}                      => {deny} 0.04213514  0.3407446 0.1236561 2.3546500  8567
[2] {1 rooms,                                                                                           
     applicant_race:White,                                                                              
     interest_rate:41-60}                      => {deny} 0.04138263  0.3381153 0.1223921 2.3364812  8414
[3] {applicant_race:White}                     => {deny} 0.07539273  0.1235273 0.6103324 0.8536119 15329
[4] {1 rooms,                                                                                           
     applicant_race:White}                     => {deny} 0.07414348  0.1224923 0.6052911 0.8464593 15075
[5] {applicant_race:White,                                                                              
     aus:Desktop Underwriter}                  => {deny} 0.04111213  0.1185674 0.3467406 0.8193371  8359
[6] {1 rooms,                                                                                           
     applicant_race:White,                                                                              
     aus:Desktop Underwriter}                  => {deny} 0.04052193  0.1176227 0.3445077 0.8128092  8239
[7] {applicant_ethnicity:Not Hispanic/Latino,                                                           
     applicant_race:White}                     => {deny} 0.05616707  0.1111652 0.5052577 0.7681857 11420
[8] {1 rooms,                                                                                           
     applicant_ethnicity:Not Hispanic/Latino,                                                           
     applicant_race:White}                     => {deny} 0.05537030  0.1103444 0.5017952 0.7625140 11258
inspect(sort(
    subset(
        a_rules_app,lhs %pin% 'race:'
    ), by='lift'
)[1:10])
     lhs                                           rhs          support confidence   coverage     lift count
[1]  {applicant_race:White,                                                                                 
      interest_rate:>80}                        => {approve} 0.11876236          1 0.11876236 1.169203 24147
[2]  {applicant_race:White,                                                                                 
      interest_rate:>80,                                                                                    
      Joint}                                    => {approve} 0.04705836          1 0.04705836 1.169203  9568
[3]  {applicant_race:White,                                                                                 
      interest_rate:>80,                                                                                    
      Male}                                     => {approve} 0.04324667          1 0.04324667 1.169203  8793
[4]  {applicant_race:White,                                                                                 
      aus:Loan Prospector/Product Advisor,                                                                  
      interest_rate:>80}                        => {approve} 0.05488339          1 0.05488339 1.169203 11159
[5]  {applicant_race:White,                                                                                 
      interest_rate:>80,                                                                                    
      Rocket Mortgage}                          => {approve} 0.06145424          1 0.06145424 1.169203 12495
[6]  {applicant_race:White,                                                                                 
      aus:Desktop Underwriter,                                                                              
      interest_rate:>80}                        => {approve} 0.08051760          1 0.08051760 1.169203 16371
[7]  {applicant_ethnicity:Not Hispanic/Latino,                                                              
      applicant_race:White,                                                                                 
      interest_rate:>80}                        => {approve} 0.09885305          1 0.09885305 1.169203 20099
[8]  {1 rooms,                                                                                              
      applicant_race:White,                                                                                 
      interest_rate:>80}                        => {approve} 0.11780329          1 0.11780329 1.169203 23952
[9]  {applicant_race:White,                                                                                 
      interest_rate:0-20,                                                                                   
      Rocket Mortgage}                          => {approve} 0.05066840          1 0.05066840 1.169203 10302
[10] {1 rooms,                                                                                              
      applicant_race:White,                                                                                 
      interest_rate:>80,                                                                                    
      Joint}                                    => {approve} 0.04677802          1 0.04677802 1.169203  9511