import numpy as np
import pandas as pd
import wrds
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.display.max_columns=200
conn=wrds.Connection()
WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully. Loading library list... Done
conn.list_tables(library='audit')
['accfiler', 'auditcblock', 'auditchange', 'auditfees', 'auditfeesr', 'auditlegal', 'auditnonreli', 'auditopin', 'auditordur', 'auditors', 'auditorsinfo', 'auditsox302', 'auditsox404', 'bankrupt', 'bdfrep', 'benefit', 'commlett', 'commlettconv', 'commlettthreads', 'currengbdfrep', 'currengnpsaud', 'diroffichange', 'f01audaff', 'f01audeve', 'f01audevetoaud', 'f01audevetyp', 'f01audloc', 'f01secloc', 'f05audopitogoiconiss', 'f05goiconiss', 'f06forapdiv', 'f06forapfil', 'f06forapnotdiv', 'f18meracq', 'f18meracqfil', 'f18meracqtratyp', 'f27aud', 'f27corpos', 'f27docdet', 'f27docdetfil', 'f27per', 'f27perfil', 'f27perpos', 'f27secloc', 'f28aud', 'f28foucod', 'f28ntecod', 'f28ntecomcod', 'f28subclacod', 'f29aud', 'f29corpos', 'f29docdet', 'f29docdetfil', 'f29per', 'f29perfil', 'f29perpos', 'f29secloc', 'f30aud', 'f30foucod', 'f30ntecod', 'f30ntecomcod', 'f30subclacod', 'f34audopitogoiconiss', 'f34forapdiv', 'f34forapfil', 'f34forapnotdiv', 'f34goiconiss', 'f35riaadvsadirown', 'f35riaadvsbindown', 'f35riaadvsd10conper', 'f35riaadvsd1bothnam', 'f35riaadvsd1fothoff', 'f35riaadvsd1iwwwadd', 'f35riaadvsd1kboorec', 'f35riaadvsd1lforreg', 'f35riaadvsd2a1secexeord', 'f35riaadvsd2a7affadv', 'f35riaadvsd2a8newforadv', 'f35riaadvsd2a9muladv', 'f35riaadvsd4suc', 'f35riaadvsd7aaffadvdea', 'f35riaadvsd7blimpar', 'f35riaadvsd9cacc', 'f35riaadvsdmis', 'f35riaforurltoliv', 'f35riasd5g3pubfun', 'f35riasd7badm', 'f35riasd7baud', 'f35riasd7bcus', 'f35riasd7bfeefun', 'f35riasd7bmar', 'f35riasd7bpribro', 'f35riasd7bprifun', 'f35riasd9acctoaud', 'f36riaadvsadirown', 'f36riaadvsbindown', 'f36riaadvsd10conper', 'f36riaadvsd1bothnam', 'f36riaadvsd1fothoff', 'f36riaadvsd1iwwwadd', 'f36riaadvsd1kboorec', 'f36riaadvsd1lforreg', 'f36riaadvsd2a1secexeord', 'f36riaadvsd2a7affadv', 'f36riaadvsd2a8newforadv', 'f36riaadvsd2a9muladv', 'f36riaadvsd4suc', 'f36riaadvsd7aaffadvdea', 'f36riaadvsd7blimpar', 'f36riaadvsd9cacc', 'f36riaadvsdmis', 'f36riaforurltoliv', 'f36riasd5g3pubfun', 'f36riasd7badm', 'f36riasd7baud', 'f36riasd7bcus', 'f36riasd7bfeefun', 'f36riasd7bmar', 'f36riasd7bpribro', 'f36riasd7bprifun', 'f36riasd9acctoaud', 'f37fordent', 'f37fordper', 'f37fordrec', 'f37fordsig', 'f38fordent', 'f38fordper', 'f38fordrec', 'f38fordsig', 'f40comrespaitocomthr', 'f43adm', 'f43bro', 'f43funtoaud', 'f54sedaudchaiss', 'f54sedaudchasub', 'f54sedaudchasubfil', 'f54sedaudchatax', 'f56accestchafil', 'f56accesttax', 'f56accesttotax', 'f60aud', 'f60audaff', 'f60sedaudopi', 'f72intaudopiaud', 'f72intaudopipar', 'f74aqrcomfla', 'f74feesupaccestcha', 'f74feesupaltsco', 'f74feesupaudcha', 'f74feesupaudfee', 'f74feesupaudfeeout', 'f74feesupaudrat', 'f74feesupbenlaw', 'f74feesupbensco', 'f74feesupceocha', 'f74feesupcfocha', 'f74feesupcivriglit', 'f74feesupcyb', 'f74feesupdiscom', 'f74feesupdiscon', 'f74feesupemplablit', 'f74feesupengparcha', 'f74feesupenvlit', 'f74feesupfinres', 'f74feesupgoicon', 'f74feesupillactlit', 'f74feesupintcon', 'f74feesupintprolit', 'f74feesuplatfil', 'f74feesupmatimp', 'f74feesupnonaudfee', 'f74feesupperadj', 'f74feesupplesec', 'f74feesupreglit', 'f74feesupshaact', 'f74feesupshaactlit', 'f78feesupcamtop', 'f79intresfil', 'f79intresisstax', 'f79intresisstocat', 'f79intrestoiss', 'f79intrestyp', 'f85cybbreatt', 'f85cybbreinf', 'f85cybbretoatt', 'f85cybbretoinf', 'f85cybfil', 'f85cybtar', 'f85cybtarrel', 'f89insdat', 'f89isstaxtogro', 'f89offloc', 'f89tax', 'f89taxgro', 'feed01del', 'feed02del', 'feed03del', 'feed04del', 'feed05del', 'feed06del', 'feed09begend', 'feed09cat', 'feed09del', 'feed09filing', 'feed09period', 'feed09tocat', 'feed10dcqua', 'feed10dcrea', 'feed10del', 'feed10qua', 'feed10rea', 'feed11del', 'feed11opqua', 'feed11oprea', 'feed11qua', 'feed11rea', 'feed13', 'feed13b', 'feed13case', 'feed13cat', 'feed13del', 'feed13web', 'feed14', 'feed14b', 'feed14case', 'feed14del', 'feed14gov', 'feed14law', 'feed14party', 'feed14ptof', 'feed16del', 'feed17change', 'feed17del', 'feed18_merger_acquisition', 'feed18del', 'feed19del', 'feed20del', 'feed20notification', 'feed20reason', 'feed21del', 'feed25aud', 'feed25clc', 'feed25clcl', 'feed25cldef', 'feed25clfc', 'feed25clissut', 'feed25clissutaxo', 'feed25clissutaxog', 'feed25clrft', 'feed25clrit', 'feed25cnsr', 'feed25corpposi', 'feed25lettref', 'feed25litgov', 'feed25litlawfirm', 'feed25person', 'feed25personemp', 'feed25personfil', 'feed25personposi', 'feed26aud', 'feed26clc', 'feed26clcl', 'feed26cldef', 'feed26clissut', 'feed26clissutaxo', 'feed26clissutaxog', 'feed26cnsr', 'feed26corpposi', 'feed26lettref', 'feed26litgov', 'feed26litlawfirm', 'feed26person', 'feed26personemp', 'feed26personfil', 'feed26personposi', 'feed31ashr', 'feed31ashtr', 'feed32del', 'feed34del', 'feed35del', 'feed36del', 'feed37del', 'feed38del', 'feed41_transfer_agents', 'feed49_sedar_audit_fee', 'feed50_sedar_audit_orig_fee', 'feed54_sedar_auditor_changes', 'feed55_auditor_ratification', 'feed56_acct_estimates_change', 'feed60_sedar_restatement', 'feed65_impairments', 'feed68_sedar_cblock', 'feed70_europe_cblock', 'feed71_eu_audit_fee', 'feed72_eu_audit_opinion', 'feed73_eu_auditor_engagements', 'feed74_aqrm', 'feed75_eu_auditor_change', 'feed76_eu_transparency_report', 'feed77_eu_key_audit_matter', 'feed78_critical_audit_matter', 'feed79_eu_restatements', 'feed85_cybersecurity', 'feed86_audit_firm_events', 'feed89_pcaob_report', 'formd', 'formdmro', 'ipo', 'names_europe', 'names_sedar', 'namesauditorsinfo', 'namescurrengnpsaud', 'namesnpsaud', 'namesprivatefund', 'namesria', 'namesriacurreng', 'npsaud', 'nt', 'privatefund', 'revauditopin', 'ria', 'riacurreng', 'sholderact', 'taxfootnt']
vars_bankrupt= conn.describe_table(library='audit', table='bankrupt')['name']
print(len(vars_bankrupt))
Approximately 2963 rows in audit.bankrupt. 217
for idx, var in enumerate(vars_bankrupt):
print(idx, var)
0 bank_key 1 subsid_name 2 bankruptcy_type 3 law_court_fkey 4 court_type_code 5 court_name 6 court_s_name 7 court_loc_street 8 court_loc_city 9 court_loc_state 10 court_loc_zip 11 court_loc 12 court_url 13 law_court_state_name 14 law_court_country 15 law_court_reg 16 law_court_con_super_reg 17 bank_begin_date 18 bank_end_date 19 ftp_file_fkey 20 form_fkey 21 file_date 22 file_accepted 23 file_size 24 http_name_html 25 http_name_text 26 company_fkey 27 best_edgar_ticker 28 matchqu_price_date 29 matchqu_price_close 30 matchqu_tso_date 31 matchqu_tso 32 matchqu_tso_markcap 33 matchqu_date_qtr 34 matchqu_date_ttm 35 matchqu_filing_code_qtr 36 matchqu_filing_code_ttm 37 matchqu_annual_quindic 38 matchqu_balsh_book_val 39 matchqu_balsh_assets 40 matchqu_balsh_cash_equivs 41 matchqu_incmst_rev_qtr 42 matchqu_incmst_rev_ttm 43 matchqu_incmst_netinc_qtr 44 matchqu_incmst_netinc_ttm 45 matchqu_incmst_extraitm_qtr 46 matchqu_incmst_extraitm_ttm 47 matchqu_incmst_ebitda_qtr 48 matchqu_incmst_ebitda_ttm 49 matchqu_eff_accchange_qtr 50 matchqu_eff_accchange_ttm 51 matchqu_cshflst_op_act_qtr 52 matchqu_cshflst_op_act_ttm 53 matchqu_cshflst_inv_act_qtr 54 matchqu_cshflst_inv_act_ttm 55 matchqu_cshflst_fin_act_qtr 56 matchqu_cshflst_fin_act_ttm 57 matchqu_cshflst_change_qtr 58 matchqu_cshflst_change_ttm 59 priorqu_price_date 60 priorqu_price_close 61 priorqu_tso_date 62 priorqu_tso 63 priorqu_tso_markcap 64 priorqu_date_qtr 65 priorqu_date_ttm 66 priorqu_filing_code_qtr 67 priorqu_filing_code_ttm 68 priorqu_annual_quindic 69 priorqu_balsh_book_val 70 priorqu_balsh_assets 71 priorqu_balsh_cash_equivs 72 priorqu_incmst_rev_qtr 73 priorqu_incmst_rev_ttm 74 priorqu_incmst_netinc_qtr 75 priorqu_incmst_netinc_ttm 76 priorqu_incmst_extraitm_qtr 77 priorqu_incmst_extraitm_ttm 78 priorqu_incmst_ebitda_qtr 79 priorqu_incmst_ebitda_ttm 80 priorqu_eff_accchange_qtr 81 priorqu_eff_accchange_ttm 82 priorqu_cshflst_op_act_qtr 83 priorqu_cshflst_op_act_ttm 84 priorqu_cshflst_inv_act_qtr 85 priorqu_cshflst_inv_act_ttm 86 priorqu_cshflst_fin_act_qtr 87 priorqu_cshflst_fin_act_ttm 88 priorqu_cshflst_change_qtr 89 priorqu_cshflst_change_ttm 90 matchfy_price_date 91 matchfy_price_close 92 matchfy_tso_date 93 matchfy_tso 94 matchfy_tso_markcap 95 matchfy_date_qtr 96 matchfy_date_ttm 97 matchfy_filing_code_qtr 98 matchfy_filing_code_ttm 99 matchfy_annual_quindic 100 matchfy_balsh_book_val 101 matchfy_balsh_assets 102 matchfy_balsh_cash_equivs 103 matchfy_incmst_rev_qtr 104 matchfy_incmst_rev_ttm 105 matchfy_incmst_netinc_qtr 106 matchfy_incmst_netinc_ttm 107 matchfy_incmst_extraitm_qtr 108 matchfy_incmst_extraitm_ttm 109 matchfy_incmst_ebitda_qtr 110 matchfy_incmst_ebitda_ttm 111 matchfy_eff_accchange_qtr 112 matchfy_eff_accchange_ttm 113 matchfy_cshflst_op_act_qtr 114 matchfy_cshflst_op_act_ttm 115 matchfy_cshflst_inv_act_qtr 116 matchfy_cshflst_inv_act_ttm 117 matchfy_cshflst_fin_act_qtr 118 matchfy_cshflst_fin_act_ttm 119 matchfy_cshflst_change_qtr 120 matchfy_cshflst_change_ttm 121 priorfy_price_date 122 priorfy_price_close 123 priorfy_tso_date 124 priorfy_tso 125 priorfy_tso_markcap 126 priorfy_date_qtr 127 priorfy_date_ttm 128 priorfy_filing_code_qtr 129 priorfy_filing_code_ttm 130 priorfy_annual_quindic 131 priorfy_balsh_book_val 132 priorfy_balsh_assets 133 priorfy_balsh_cash_equivs 134 priorfy_incmst_rev_qtr 135 priorfy_incmst_rev_ttm 136 priorfy_incmst_netinc_qtr 137 priorfy_incmst_netinc_ttm 138 priorfy_incmst_extraitm_qtr 139 priorfy_incmst_extraitm_ttm 140 priorfy_incmst_ebitda_qtr 141 priorfy_incmst_ebitda_ttm 142 priorfy_eff_accchange_qtr 143 priorfy_eff_accchange_ttm 144 priorfy_cshflst_op_act_qtr 145 priorfy_cshflst_op_act_ttm 146 priorfy_cshflst_inv_act_qtr 147 priorfy_cshflst_inv_act_ttm 148 priorfy_cshflst_fin_act_qtr 149 priorfy_cshflst_fin_act_ttm 150 priorfy_cshflst_change_qtr 151 priorfy_cshflst_change_ttm 152 closestqu_price_date 153 closestqu_price_close 154 closestqu_tso_date 155 closestqu_tso 156 closestqu_tso_markcap 157 closestqu_date_qtr 158 closestqu_date_ttm 159 closestqu_filing_code_qtr 160 closestqu_filing_code_ttm 161 closestqu_annual_quindic 162 closestqu_balsh_book_val 163 closestqu_balsh_assets 164 closestqu_balsh_cash_equivs 165 closestqu_incmst_rev_qtr 166 closestqu_incmst_rev_ttm 167 closestqu_incmst_netinc_qtr 168 closestqu_incmst_netinc_ttm 169 closestqu_incmst_extraitm_qtr 170 closestqu_incmst_extraitm_ttm 171 closestqu_incmst_ebitda_qtr 172 closestqu_incmst_ebitda_ttm 173 closestqu_eff_accchange_qtr 174 closestqu_eff_accchange_ttm 175 closestqu_cshflst_op_act_qtr 176 closestqu_cshflst_op_act_ttm 177 closestqu_cshflst_inv_act_qtr 178 closestqu_cshflst_inv_act_ttm 179 closestqu_cshflst_fin_act_qtr 180 closestqu_cshflst_fin_act_ttm 181 closestqu_cshflst_change_qtr 182 closestqu_cshflst_change_ttm 183 matchfy_sumfees_fy_end 184 matchfy_sumfees_fy 185 matchfy_sum_audfees 186 matchfy_sum_nonaud 187 matchfy_sum_benfees 188 matchfy_sum_itfees 189 matchfy_sum_taxfees 190 matchfy_sum_audrel_fees 191 matchfy_sum_other 192 matchfy_sum_total 193 priorfy_sumfees_fy_end 194 priorfy_sumfees_fy 195 priorfy_sum_audfees 196 priorfy_sum_nonaud 197 priorfy_sum_benfees 198 priorfy_sum_itfees 199 priorfy_sum_taxfees 200 priorfy_sum_audrel_fees 201 priorfy_sum_other 202 priorfy_sum_total 203 closestfy_sumevent_key 204 closestfy_sum_comp_key 205 closestfy_feesfye 206 closestfy_sumfees_fy 207 closestfy_sum_audfees 208 closestfy_sum_nonaud 209 closestfy_sum_benfees 210 closestfy_sum_itfees 211 closestfy_sum_taxfees 212 closestfy_sum_audrel_fees 213 closestfy_sum_other 214 closestfy_sum_total 215 eventdate_aud_fkey 216 eventdate_aud_name
vars_to_skip= ['matchfy', 'matchqu', 'closestfy', 'closestqu', 'priorqu', 'priorfy', 'note', 'opinion_text']
vars_to_keep=[]
for idx, var in enumerate(vars_bankrupt):
if any(var_to_skip in var for var_to_skip in vars_to_skip):
pass
else:
print(idx, var)
vars_to_keep.append(var)
0 bank_key 1 subsid_name 2 bankruptcy_type 3 law_court_fkey 4 court_type_code 5 court_name 6 court_s_name 7 court_loc_street 8 court_loc_city 9 court_loc_state 10 court_loc_zip 11 court_loc 12 court_url 13 law_court_state_name 14 law_court_country 15 law_court_reg 16 law_court_con_super_reg 17 bank_begin_date 18 bank_end_date 19 ftp_file_fkey 20 form_fkey 21 file_date 22 file_accepted 23 file_size 24 http_name_html 25 http_name_text 26 company_fkey 27 best_edgar_ticker 215 eventdate_aud_fkey 216 eventdate_aud_name
vars_to_keep= ['bank_key', 'bankruptcy_type', 'company_fkey', 'best_edgar_ticker', 'bank_begin_date', 'bank_end_date', 'form_fkey', 'file_date', 'eventdate_aud_fkey', 'eventdate_aud_name']
df_test= conn.get_table(library= 'audit', table='bankrupt',
columns= vars_to_keep,
obs= 3)
df_test
bank_key | bankruptcy_type | company_fkey | best_edgar_ticker | bank_begin_date | bank_end_date | form_fkey | file_date | eventdate_aud_fkey | eventdate_aud_name | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2863.0 | 11.0 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761.0 | BDO USA LLP |
1 | 2740.0 | 11.0 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3.0 | Deloitte & Touche LLP |
2 | 265.0 | 11.0 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1.0 | PricewaterhouseCoopers LLP |
df= conn.get_table(library= 'audit', table='bankrupt',
columns= vars_to_keep)
print(df.shape)
df.head()
(2963, 10)
bank_key | bankruptcy_type | company_fkey | best_edgar_ticker | bank_begin_date | bank_end_date | form_fkey | file_date | eventdate_aud_fkey | eventdate_aud_name | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2863.0 | 11.0 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761.0 | BDO USA LLP |
1 | 2740.0 | 11.0 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3.0 | Deloitte & Touche LLP |
2 | 265.0 | 11.0 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1.0 | PricewaterhouseCoopers LLP |
3 | 2906.0 | 11.0 | 0000003116 | None | 2020-05-20 | 2021-02-19 | 8-K | 2020-05-21 | 11761.0 | BDO USA LLP |
4 | 499.0 | 11.0 | 0000003662 | None | 2001-02-06 | 2006-07-31 | 8-K | 2001-02-20 | 3.0 | Deloitte & Touche LLP |
df.rename(columns=
{'eventdate_aud_fkey':'afk',
'eventdate_aud_name':'afn',
'company_fkey':'cik',
'best_edgar_ticker':'tic'},
inplace= True)
df.head()
bank_key | bankruptcy_type | cik | tic | bank_begin_date | bank_end_date | form_fkey | file_date | afk | afn | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2863.0 | 11.0 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761.0 | BDO USA LLP |
1 | 2740.0 | 11.0 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3.0 | Deloitte & Touche LLP |
2 | 265.0 | 11.0 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1.0 | PricewaterhouseCoopers LLP |
3 | 2906.0 | 11.0 | 0000003116 | None | 2020-05-20 | 2021-02-19 | 8-K | 2020-05-21 | 11761.0 | BDO USA LLP |
4 | 499.0 | 11.0 | 0000003662 | None | 2001-02-06 | 2006-07-31 | 8-K | 2001-02-20 | 3.0 | Deloitte & Touche LLP |
df[['bank_key', 'bankruptcy_type', 'afk']]= df[['bank_key', 'bankruptcy_type', 'afk']].astype('int')
df['big4']= 0
df.loc[df.afk <=4, 'big4']= 1
df.head()
bank_key | bankruptcy_type | cik | tic | bank_begin_date | bank_end_date | form_fkey | file_date | afk | afn | big4 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2863 | 11 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761 | BDO USA LLP | 0 |
1 | 2740 | 11 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3 | Deloitte & Touche LLP | 1 |
2 | 265 | 11 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1 | PricewaterhouseCoopers LLP | 1 |
3 | 2906 | 11 | 0000003116 | None | 2020-05-20 | 2021-02-19 | 8-K | 2020-05-21 | 11761 | BDO USA LLP | 0 |
4 | 499 | 11 | 0000003662 | None | 2001-02-06 | 2006-07-31 | 8-K | 2001-02-20 | 3 | Deloitte & Touche LLP | 1 |
df.dtypes
bank_key int32 bankruptcy_type int32 cik object tic object bank_begin_date object bank_end_date object form_fkey object file_date object afk int32 afn object big4 int64 dtype: object
df['bank_begin_date'][0].year
2019
df['bank_begin_year']= df['bank_begin_date'].astype('datetime64').dt.year
df.head()
bank_key | bankruptcy_type | cik | tic | bank_begin_date | bank_end_date | form_fkey | file_date | afk | afn | big4 | bank_begin_year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2863 | 11 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761 | BDO USA LLP | 0 | 2019 |
1 | 2740 | 11 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3 | Deloitte & Touche LLP | 1 | 2017 |
2 | 265 | 11 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1 | PricewaterhouseCoopers LLP | 1 | 2002 |
3 | 2906 | 11 | 0000003116 | None | 2020-05-20 | 2021-02-19 | 8-K | 2020-05-21 | 11761 | BDO USA LLP | 0 | 2020 |
4 | 499 | 11 | 0000003662 | None | 2001-02-06 | 2006-07-31 | 8-K | 2001-02-20 | 3 | Deloitte & Touche LLP | 1 | 2001 |
df.form_fkey.value_counts().sort_index()
10-K 4 10-K405 3 10-Q 5 10KSB 1 10QSB 6 6-K 16 8-K 2036 8-K/A 7 Docket 882 NT 10-Q 2 Name: form_fkey, dtype: int64
df.bankruptcy_type.value_counts().sort_index()
0 1 7 596 11 2357 15 9 Name: bankruptcy_type, dtype: int64
df=df[(df.bank_begin_year >= 2000) & (df.bank_begin_year <= 2021)]
fig= plt.figure(figsize=(15,5))
df_for_fig= df[df.bankruptcy_type.isin([7,11])]
sns.countplot(data= df_for_fig, x='bank_begin_year', hue= 'bankruptcy_type')
plt.xticks(rotation=0)
plt.title(f"Yearly distribution of bankruptcies: Chapter 7 and 11 (N= {df_for_fig.shape[0]:,})", fontsize= 30, fontname='Times New Roman')
plt.xlabel('Bankruptcy Year', fontname='Times New Roman', fontsize=18)
plt.ylabel('Count', fontname='Times New Roman', fontsize=18)
plt.grid()
plt.show()
fig.savefig('../assets/images/bankruptcy-dist.jpg')
fig= plt.figure(figsize=(15,5))
sns.countplot(data=df[df.bankruptcy_type.isin([11])], x='bank_begin_year', hue= 'big4')
plt.xticks(rotation=0)
plt.title("Yearly distribution of Ch.11 bankruptcies: Big4 and Non-big4", fontsize= 30, fontname='Times New Roman')
plt.xlabel('Bankruptcy Year', fontname='Times New Roman', fontsize=18)
plt.ylabel('Count', fontname='Times New Roman', fontsize=18)
plt.grid()
plt.show()
df.to_csv('data/aa_bankrupt.csv', index= False)
df.head()
bank_key | bankruptcy_type | cik | tic | bank_begin_date | bank_end_date | form_fkey | file_date | afk | afn | big4 | bank_begin_year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2863 | 11 | 0000002034 | None | 2019-02-19 | None | 8-K | 2019-09-20 | 11761 | BDO USA LLP | 0 | 2019 |
1 | 2740 | 11 | 0000002178 | AE | 2017-04-21 | 2018-10-19 | 8-K | 2017-04-24 | 3 | Deloitte & Touche LLP | 1 | 2017 |
2 | 265 | 11 | 0000002852 | None | 2002-10-01 | 2013-01-09 | 8-K | 2002-10-15 | 1 | PricewaterhouseCoopers LLP | 1 | 2002 |
3 | 2906 | 11 | 0000003116 | None | 2020-05-20 | 2021-02-19 | 8-K | 2020-05-21 | 11761 | BDO USA LLP | 0 | 2020 |
4 | 499 | 11 | 0000003662 | None | 2001-02-06 | 2006-07-31 | 8-K | 2001-02-20 | 3 | Deloitte & Touche LLP | 1 | 2001 |
conn.close()