# Import necessary packages
import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
# Initialize instance of H2O
h2o.init()
H2O cluster uptime: | 1 days 23 hours 13 minutes 6 seconds 497 milliseconds |
H2O cluster version: | 3.8.3.2 |
H2O cluster name: | H2O_started_from_python_avniwadhwa_lrw267 |
H2O cluster total nodes: | 1 |
H2O cluster total free memory: | 2.8 GB |
H2O cluster total cores: | 8 |
H2O cluster allowed cores: | 8 |
H2O cluster healthy: | True |
H2O Connection ip: | 127.0.0.1 |
H2O Connection port: | 54321 |
H2O Connection proxy: | None |
Python Version: | 2.7.10 |
# If possible download from the s3 link and change the path to the dataset
path = "http://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/lending-club/LoanStats3a.csv"
# Specify some column types to "String" that we want to munge later
types = {"int_rate":"string", "revol_util":"string", "emp_length":"string",
"earliest_cr_line":"string", "issue_d":"string", "last_credit_pull_d":"factor", "verification_status":"string"}
# Task 1: Import the file and look at the frame
data = h2o.import_file(path=path, col_types= types)
data.describe()
Parse Progress: [##################################################] 100% Rows:42,538 Cols:52 Chunk compression summary:
chunk_type | chunk_name | count | count_percentage | size | size_percentage |
C0L | Constant Integers | 167 | 10.0360580 | 13.0 KB | 0.1296856 |
CBS | Bits | 22 | 1.3221154 | 5.3 KB | 0.0523596 |
CX0 | Zero Sparse Bits | 10 | 0.6009615 | 1.5 KB | 0.0146576 |
CXI | Zero Sparse Integers | 106 | 6.3701920 | 48.0 KB | 0.4775846 |
C1 | 1-Byte Integers | 40 | 2.4038462 | 51.7 KB | 0.5138888 |
C1N | 1-Byte Integers (w/o NAs) | 371 | 22.2956732 | 509.1 KB | 5.0605815 |
C2 | 2-Byte Integers | 164 | 9.8557696 | 429.1 KB | 4.2652965 |
C2S | 2-Byte Fractions | 60 | 3.6057692 | 168.3 KB | 1.6727109 |
C4 | 4-Byte Integers | 96 | 5.7692308 | 504.9 KB | 5.0183658 |
C4S | 4-Byte Fractions | 209 | 12.5600964 | 1.1 MB | 11.0504918 |
CStr | String | 224 | 13.4615391 | 6.2 MB | 63.5759711 |
CXD | Zero Sparse Reals | 126 | 7.5721152 | 133.7 KB | 1.3292189 |
CUD | Unique Reals | 5 | 0.3004808 | 19.1 KB | 0.1902767 |
C8D | 64-bit Reals | 64 | 3.8461540 | 668.9 KB | 6.6489093 |
Frame distribution summary:
size | number_of_rows | number_of_chunks_per_column | number_of_chunks | |
127.0.0.1:54321 | 9.8 MB | 42538.0 | 32.0 | 1664.0 |
mean | 9.8 MB | 42538.0 | 32.0 | 1664.0 |
min | 9.8 MB | 42538.0 | 32.0 | 1664.0 |
max | 9.8 MB | 42538.0 | 32.0 | 1664.0 |
stddev | 0 B | 0.0 | 0.0 | 0.0 |
total | 9.8 MB | 42538.0 | 32.0 | 1664.0 |
id | member_id | loan_amnt | funded_amnt | funded_amnt_inv | term | int_rate | installment | grade | sub_grade | emp_title | emp_length | home_ownership | annual_inc | verification_status | issue_d | loan_status | pymnt_plan | url | desc | purpose | title | zip_code | addr_state | dti | delinq_2yrs | earliest_cr_line | inq_last_6mths | mths_since_last_delinq | mths_since_last_record | open_acc | pub_rec | revol_bal | revol_util | total_acc | initial_list_status | out_prncp | out_prncp_inv | total_pymnt | total_pymnt_inv | total_rec_prncp | total_rec_int | total_rec_late_fee | recoveries | collection_recovery_fee | last_pymnt_d | last_pymnt_amnt | next_pymnt_d | last_credit_pull_d | collections_12_mths_ex_med | mths_since_last_major_derog | policy_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
type | int | int | int | int | real | enum | string | real | enum | enum | enum | string | enum | real | string | string | enum | enum | string | enum | enum | enum | enum | enum | real | int | string | int | enum | enum | int | int | int | string | int | enum | real | real | real | real | real | real | real | real | real | enum | real | enum | enum | int | enum | int |
mins | 54734.0 | 70473.0 | 500.0 | 500.0 | 0.0 | 0.0 | NaN | 15.67 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 1896.0 | NaN | NaN | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | NaN | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
mean | 664579.85231 | 825702.55117 | 11089.7225814 | 10821.5857529 | 10139.8306012 | 0.258634066063 | NaN | 322.625618667 | NaN | NaN | NaN | NaN | NaN | 69136.5564203 | NaN | NaN | NaN | 0.00014106030328 | NaN | NaN | NaN | NaN | NaN | NaN | 13.3730431409 | 0.152449066014 | NaN | 1.08142379899 | NaN | NaN | 9.34395144215 | 0.0581564955536 | 14297.8609145 | NaN | 22.1244059662 | 0.0 | 329.411123545 | 327.08996544 | 11651.753137 | 10948.5801657 | 9368.08058187 | 2209.04911203 | 1.45873268638 | 73.164719007 | 9.07681827201 | NaN | 2552.60725685 | NaN | NaN | 0.0 | 0.0 | 1.0 |
maxs | 1077501.0 | 1314167.0 | 35000.0 | 35000.0 | 35000.0 | 1.0 | NaN | 1305.19 | 6.0 | 34.0 | 30660.0 | NaN | 4.0 | 6000000.0 | NaN | NaN | 10.0 | 1.0 | NaN | 28964.0 | 13.0 | 21267.0 | 836.0 | 49.0 | 29.99 | 13.0 | NaN | 33.0 | 95.0 | 113.0 | 47.0 | 5.0 | 1207359.0 | NaN | 90.0 | 0.0 | 17749.51 | 17749.51 | 56809.0516288 | 56475.05 | 35000.03 | 21809.05 | 208.819529958 | 29623.35 | 7002.19 | 91.0 | 36115.2 | 93.0 | 97.0 | 0.0 | 0.0 | 1.0 |
sigma | 219302.219319 | 279540.905635 | 7410.93839055 | 7146.91467501 | 7131.68644868 | 0.437889248459 | NaN | 208.928069 | NaN | NaN | NaN | NaN | NaN | 64096.3497189 | NaN | NaN | NaN | 0.0118761829396 | NaN | NaN | NaN | NaN | NaN | NaN | 6.72631490173 | 0.512406485017 | NaN | 1.5274548353 | NaN | NaN | 4.49627387569 | 0.245713140651 | 22018.4410097 | NaN | 11.5928113373 | 0.0 | 1417.54568613 | 1409.61373349 | 8555.46989933 | 8486.98039403 | 6777.11981861 | 2502.52824092 | 7.57275617593 | 612.26018535 | 149.086823026 | NaN | 4380.38476617 | NaN | NaN | 0.0 | 0.0 | 0.0 |
zeros | 0 | 0 | 0 | 0 | 233 | 31534 | 0 | 0 | 10183 | 1142 | 2618 | 0 | 18959 | 0 | 0 | 0 | 5435 | 42529 | 0 | 13292 | 1615 | 10 | 1 | 86 | 206 | 37771 | 0 | 19657 | 26926 | 38884 | 0 | 40130 | 1119 | 0 | 0 | 42535 | 38972 | 38972 | 26 | 304 | 86 | 83 | 40214 | 38357 | 39033 | 83 | 95 | 36278 | 4 | 42390 | 42535 | 0 |
missing | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 7 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 32 | 3 | 32 | 3 | 3 | 32 | 32 | 3 | 3 | 32 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 148 | 3 | 3 |
0 | 1077501.0 | 1296599.0 | 5000.0 | 5000.0 | 4975.0 | 36 months | 10.65% | 162.87 | B | B2 | 10+ years | RENT | 24000.0 | VERIFIED - income | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077501 | Borrower added on 12/22/11 > I need to upgrade my business technologies. | credit_card | Computer | 860xx | AZ | 27.65 | 0.0 | Jan-1985 | 1.0 | 3.0 | 0.0 | 13648.0 | 83.7% | 9.0 | f | 0.0 | 0.0 | 5861.07141425 | 5831.78 | 5000.0 | 861.07 | 0.0 | 0.0 | 0.0 | Jan-2015 | 171.62 | Jan-2015 | 0.0 | 1.0 | |||||
1 | 1077430.0 | 1314167.0 | 2500.0 | 2500.0 | 2500.0 | 60 months | 15.27% | 59.83 | C | C4 | Ryder | < 1 year | RENT | 30000.0 | VERIFIED - income source | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077430 | Borrower added on 12/22/11 > I plan to use this money to finance the motorcycle i am looking at. I plan to have it paid off as soon as possible/when i sell my old bike. I only need this money because the deal im looking at is to good to pass up. Borrower added on 12/22/11 > I plan to use this money to finance the motorcycle i am looking at. I plan to have it paid off as soon as possible/when i sell my old bike.I only need this money because the deal im looking at is to good to pass up. I have finished college with an associates degree in business and its takingmeplaces | car | bike | 309xx | GA | 1.0 | 0.0 | Apr-1999 | 5.0 | 3.0 | 0.0 | 1687.0 | 9.4% | 4.0 | f | 0.0 | 0.0 | 1008.71 | 1008.71 | 456.46 | 435.17 | 0.0 | 117.08 | 1.11 | Apr-2013 | 119.66 | Sep-2013 | 0.0 | 1.0 | ||||
2 | 1077175.0 | 1313524.0 | 2400.0 | 2400.0 | 2400.0 | 36 months | 15.96% | 84.33 | C | C5 | 10+ years | RENT | 12252.0 | not verified | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077175 | small_business | real estate business | 606xx | IL | 8.72 | 0.0 | Nov-2001 | 2.0 | 2.0 | 0.0 | 2956.0 | 98.5% | 10.0 | f | 0.0 | 0.0 | 3003.65364445 | 3003.65 | 2400.0 | 603.65 | 0.0 | 0.0 | 0.0 | Jun-2014 | 649.91 | Jun-2015 | 0.0 | 1.0 | ||||||
3 | 1076863.0 | 1277178.0 | 10000.0 | 10000.0 | 10000.0 | 36 months | 13.49% | 339.31 | C | C1 | AIR RESOURCES BOARD | 10+ years | RENT | 49200.0 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1076863 | Borrower added on 12/21/11 > to pay for property tax (borrow from friend, need to pay back) & central A/C need to be replace. I'm very sorry to let my loan expired last time. | other | personel | 917xx | CA | 20.0 | 0.0 | Feb-1996 | 1.0 | 35 | 10.0 | 0.0 | 5598.0 | 21% | 37.0 | f | 0.0 | 0.0 | 12226.3022123 | 12226.3 | 10000.0 | 2209.33 | 16.97 | 0.0 | 0.0 | Jan-2015 | 357.48 | Jan-2015 | 0.0 | 1.0 | |||
4 | 1075358.0 | 1311748.0 | 3000.0 | 3000.0 | 3000.0 | 60 months | 12.69% | 67.79 | B | B5 | University Medical Group | 1 year | RENT | 80000.0 | VERIFIED - income source | Dec-2011 | Current | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1075358 | Borrower added on 12/21/11 > I plan on combining three large interest bills together and freeing up some extra each month to pay toward other bills. I've always been a good payor but have found myself needing to make adjustments to my budget due to a medical scare. My job is very stable, I love it. | other | Personal | 972xx | OR | 17.94 | 0.0 | Jan-1996 | 0.0 | 38 | 15.0 | 0.0 | 27783.0 | 53.9% | 38.0 | f | 1168.04 | 1168.04 | 2767.64 | 2767.64 | 1831.96 | 935.68 | 0.0 | 0.0 | 0.0 | Jun-2015 | 67.79 | Jul-2015 | Jun-2015 | 0.0 | 1.0 | ||
5 | 1075269.0 | 1311441.0 | 5000.0 | 5000.0 | 5000.0 | 36 months | 7.90% | 156.46 | A | A4 | Veolia Transportaton | 3 years | RENT | 36000.0 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1075269 | wedding | My wedding loan I promise to pay back | 852xx | AZ | 11.2 | 0.0 | Nov-2004 | 3.0 | 9.0 | 0.0 | 7963.0 | 28.3% | 12.0 | f | 0.0 | 0.0 | 5631.37775318 | 5631.38 | 5000.0 | 631.38 | 0.0 | 0.0 | 0.0 | Jan-2015 | 161.03 | Jun-2015 | 0.0 | 1.0 | |||||
6 | 1069639.0 | 1304742.0 | 7000.0 | 7000.0 | 7000.0 | 60 months | 15.96% | 170.08 | C | C5 | Southern Star Photography | 8 years | RENT | 47004.0 | not verified | Dec-2011 | Current | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1069639 | Borrower added on 12/18/11 > I am planning on using the funds to pay off two retail credit cards with 24.99% interest rates, as well as a major bank credit card with a 18.99% rate. I pay all my bills on time, looking for a lower combined payment and lower monthly payment. | debt_consolidation | Loan | 280xx | NC | 23.51 | 0.0 | Jul-2005 | 1.0 | 7.0 | 0.0 | 17726.0 | 85.6% | 11.0 | f | 2853.21 | 2853.21 | 6946.28 | 6946.28 | 4146.8 | 2799.48 | 0.0 | 0.0 | 0.0 | Jun-2015 | 170.08 | Aug-2015 | Jun-2015 | 0.0 | 1.0 | |||
7 | 1072053.0 | 1288686.0 | 3000.0 | 3000.0 | 3000.0 | 36 months | 18.64% | 109.43 | E | E1 | MKC Accounting | 9 years | RENT | 48000.0 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1072053 | Borrower added on 12/16/11 > Downpayment for a car. | car | Car Downpayment | 900xx | CA | 5.35 | 0.0 | Jan-2007 | 2.0 | 4.0 | 0.0 | 8221.0 | 87.5% | 4.0 | f | 0.0 | 0.0 | 3938.14433376 | 3938.14 | 3000.0 | 938.14 | 0.0 | 0.0 | 0.0 | Jan-2015 | 111.34 | Dec-2014 | 0.0 | 1.0 | ||||
8 | 1071795.0 | 1306957.0 | 5600.0 | 5600.0 | 5600.0 | 60 months | 21.28% | 152.39 | F | F2 | 4 years | OWN | 40000.0 | VERIFIED - income source | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1071795 | Borrower added on 12/21/11 > I own a small home-based judgment collection business. I have 5 years experience collecting debts. I am now going from a home office to a small office. I also plan to buy a small debt portfolio (eg. $10K for $1M of debt) My score is not A+ because I own my home and have no mortgage. | small_business | Expand Business & Buy Debt Portfolio | 958xx | CA | 5.55 | 0.0 | Apr-2004 | 2.0 | 11.0 | 0.0 | 5210.0 | 32.6% | 13.0 | f | 0.0 | 0.0 | 646.02 | 646.02 | 162.02 | 294.94 | 0.0 | 189.06 | 2.09 | Apr-2012 | 152.39 | Aug-2012 | 0.0 | 1.0 | |||||
9 | 1071570.0 | 1306721.0 | 5375.0 | 5375.0 | 5350.0 | 60 months | 12.69% | 121.45 | B | B5 | Starbucks | < 1 year | RENT | 15000.0 | VERIFIED - income | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1071570 | Borrower added on 12/16/11 > I'm trying to build up my credit history. I live with my brother and have no car payment or credit cards. I am in community college and work full time. Im going to use the money to make some repairs around the house and get some maintenance done on my car. Borrower added on 12/20/11 > $1000 down only $4375 to go. Thanks to everyone that invested so far, looking forward to surprising my brother with the fixes around the house. | other | Building my credit history. | 774xx | TX | 18.08 | 0.0 | Sep-2004 | 0.0 | 2.0 | 0.0 | 9279.0 | 36.5% | 3.0 | f | 0.0 | 0.0 | 1476.19 | 1469.34 | 673.48 | 533.42 | 0.0 | 269.29 | 2.52 | Nov-2012 | 121.45 | Mar-2013 | 0.0 | 1.0 |
# Task 2: Look at the levels int he response column, "loan_status"
# Hint: Use .table() function on the response column
loan_status | Count |
---|---|
Charged Off | 5435 |
Current | 3351 |
Default | 7 |
Does not meet the credit policy. Status:Charged Off | 761 |
Does not meet the credit policy. Status:Current | 53 |
Does not meet the credit policy. Status:Fully Paid | 1933 |
Does not meet the credit policy. Status:In Grace Period | 2 |
Fully Paid | 30843 |
In Grace Period | 60 |
Late (16-30 days) | 16 |
# Task 3: Drop all loans that are still in progess and therefore cannot be deemed good/bad loans
# Hint: "Current", "In Grace Period", "Late (16-30 days)", "Late (31-120 days)" are ongoing loans
data.show()
id | member_id | loan_amnt | funded_amnt | funded_amnt_inv | term | int_rate | installment | grade | sub_grade | emp_title | emp_length | home_ownership | annual_inc | verification_status | issue_d | loan_status | pymnt_plan | url | desc | purpose | title | zip_code | addr_state | dti | delinq_2yrs | earliest_cr_line | inq_last_6mths | mths_since_last_delinq | mths_since_last_record | open_acc | pub_rec | revol_bal | revol_util | total_acc | initial_list_status | out_prncp | out_prncp_inv | total_pymnt | total_pymnt_inv | total_rec_prncp | total_rec_int | total_rec_late_fee | recoveries | collection_recovery_fee | last_pymnt_d | last_pymnt_amnt | next_pymnt_d | last_credit_pull_d | collections_12_mths_ex_med | mths_since_last_major_derog | policy_code |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1.0775e+06 | 1.2966e+06 | 5000 | 5000 | 4975 | 36 months | 10.65% | 162.87 | B | B2 | 10+ years | RENT | 24000 | VERIFIED - income | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077501 | Borrower added on 12/22/11 > I need to upgrade my business technologies. | credit_card | Computer | 860xx | AZ | 27.65 | 0 | Jan-1985 | 1 | 3 | 0 | 13648 | 83.7% | 9 | f | 0 | 0 | 5861.07 | 5831.78 | 5000 | 861.07 | 0 | 0 | 0 | Jan-2015 | 171.62 | Jan-2015 | 0 | 1 | |||||
1.07743e+06 | 1.31417e+06 | 2500 | 2500 | 2500 | 60 months | 15.27% | 59.83 | C | C4 | Ryder | < 1 year | RENT | 30000 | VERIFIED - income source | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077430 | Borrower added on 12/22/11 > I plan to use this money to finance the motorcycle i am looking at. I plan to have it paid off as soon as possible/when i sell my old bike. I only need this money because the deal im looking at is to good to pass up. Borrower added on 12/22/11 > I plan to use this money to finance the motorcycle i am looking at. I plan to have it paid off as soon as possible/when i sell my old bike.I only need this money because the deal im looking at is to good to pass up. I have finished college with an associates degree in business and its takingmeplaces | car | bike | 309xx | GA | 1 | 0 | Apr-1999 | 5 | 3 | 0 | 1687 | 9.4% | 4 | f | 0 | 0 | 1008.71 | 1008.71 | 456.46 | 435.17 | 0 | 117.08 | 1.11 | Apr-2013 | 119.66 | Sep-2013 | 0 | 1 | ||||
1.07718e+06 | 1.31352e+06 | 2400 | 2400 | 2400 | 36 months | 15.96% | 84.33 | C | C5 | 10+ years | RENT | 12252 | not verified | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077175 | small_business | real estate business | 606xx | IL | 8.72 | 0 | Nov-2001 | 2 | 2 | 0 | 2956 | 98.5% | 10 | f | 0 | 0 | 3003.65 | 3003.65 | 2400 | 603.65 | 0 | 0 | 0 | Jun-2014 | 649.91 | Jun-2015 | 0 | 1 | ||||||
1.07686e+06 | 1.27718e+06 | 10000 | 10000 | 10000 | 36 months | 13.49% | 339.31 | C | C1 | AIR RESOURCES BOARD | 10+ years | RENT | 49200 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1076863 | Borrower added on 12/21/11 > to pay for property tax (borrow from friend, need to pay back) & central A/C need to be replace. I'm very sorry to let my loan expired last time. | other | personel | 917xx | CA | 20 | 0 | Feb-1996 | 1 | 35 | 10 | 0 | 5598 | 21% | 37 | f | 0 | 0 | 12226.3 | 12226.3 | 10000 | 2209.33 | 16.97 | 0 | 0 | Jan-2015 | 357.48 | Jan-2015 | 0 | 1 | |||
1.07527e+06 | 1.31144e+06 | 5000 | 5000 | 5000 | 36 months | 7.90% | 156.46 | A | A4 | Veolia Transportaton | 3 years | RENT | 36000 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1075269 | wedding | My wedding loan I promise to pay back | 852xx | AZ | 11.2 | 0 | Nov-2004 | 3 | 9 | 0 | 7963 | 28.3% | 12 | f | 0 | 0 | 5631.38 | 5631.38 | 5000 | 631.38 | 0 | 0 | 0 | Jan-2015 | 161.03 | Jun-2015 | 0 | 1 | |||||
1.07205e+06 | 1.28869e+06 | 3000 | 3000 | 3000 | 36 months | 18.64% | 109.43 | E | E1 | MKC Accounting | 9 years | RENT | 48000 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1072053 | Borrower added on 12/16/11 > Downpayment for a car. | car | Car Downpayment | 900xx | CA | 5.35 | 0 | Jan-2007 | 2 | 4 | 0 | 8221 | 87.5% | 4 | f | 0 | 0 | 3938.14 | 3938.14 | 3000 | 938.14 | 0 | 0 | 0 | Jan-2015 | 111.34 | Dec-2014 | 0 | 1 | ||||
1.0718e+06 | 1.30696e+06 | 5600 | 5600 | 5600 | 60 months | 21.28% | 152.39 | F | F2 | 4 years | OWN | 40000 | VERIFIED - income source | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1071795 | Borrower added on 12/21/11 > I own a small home-based judgment collection business. I have 5 years experience collecting debts. I am now going from a home office to a small office. I also plan to buy a small debt portfolio (eg. $10K for $1M of debt) My score is not A+ because I own my home and have no mortgage. | small_business | Expand Business & Buy Debt Portfolio | 958xx | CA | 5.55 | 0 | Apr-2004 | 2 | 11 | 0 | 5210 | 32.6% | 13 | f | 0 | 0 | 646.02 | 646.02 | 162.02 | 294.94 | 0 | 189.06 | 2.09 | Apr-2012 | 152.39 | Aug-2012 | 0 | 1 | |||||
1.07157e+06 | 1.30672e+06 | 5375 | 5375 | 5350 | 60 months | 12.69% | 121.45 | B | B5 | Starbucks | < 1 year | RENT | 15000 | VERIFIED - income | Dec-2011 | Charged Off | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1071570 | Borrower added on 12/16/11 > I'm trying to build up my credit history. I live with my brother and have no car payment or credit cards. I am in community college and work full time. Im going to use the money to make some repairs around the house and get some maintenance done on my car. Borrower added on 12/20/11 > $1000 down only $4375 to go. Thanks to everyone that invested so far, looking forward to surprising my brother with the fixes around the house. | other | Building my credit history. | 774xx | TX | 18.08 | 0 | Sep-2004 | 0 | 2 | 0 | 9279 | 36.5% | 3 | f | 0 | 0 | 1476.19 | 1469.34 | 673.48 | 533.42 | 0 | 269.29 | 2.52 | Nov-2012 | 121.45 | Mar-2013 | 0 | 1 | ||||
1.07008e+06 | 1.3052e+06 | 6500 | 6500 | 6500 | 60 months | 14.65% | 153.45 | C | C3 | Southwest Rural metro | 5 years | OWN | 72000 | not verified | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1070078 | Borrower added on 12/15/11 > I had recived a loan from Citi Financial about a year ago, I was paying 29.99 intrest, so the refinance is to cut that rate since cleaning up my credit I have been paying everything on time as shown on my credit report | debt_consolidation | High intrest Consolidation | 853xx | AZ | 16.12 | 0 | Jan-1998 | 2 | 14 | 0 | 4032 | 20.6% | 23 | f | 0 | 0 | 7677.52 | 7677.52 | 6500 | 1177.52 | 0 | 0 | 0 | Jun-2013 | 1655.54 | Jul-2013 | 0 | 1 | ||||
1.06991e+06 | 1.30501e+06 | 12000 | 12000 | 12000 | 36 months | 12.69% | 402.54 | B | B5 | UCLA | 10+ years | OWN | 75000 | VERIFIED - income source | Dec-2011 | Fully Paid | n | https://www.lendingclub.com/browse/loanDetail.action?loan_id=1069908 | debt_consolidation | Consolidation | 913xx | CA | 10.78 | 0 | Oct-1989 | 0 | 12 | 0 | 23336 | 67.1% | 34 | f | 0 | 0 | 13943.1 | 13943.1 | 12000 | 1943.08 | 0 | 0 | 0 | Sep-2013 | 6315.3 | Aug-2013 | 0 | 1 |
# Task 4: Bin the response variable to good/bad oans only, use your best judgement for what qualifies as a good/bad loan
# Create a new column called "bad_loan" which should be a binary variable
# Hint: You can turn the bad_loan columm into a factor using .asfactor()
# Task 5: String munging to clearn string columns before converting to numeric
# Hint: Column that need munging include "int_rate," "revol_util," "emp_length"
#### Example for int_rate using gsub, trim, asnumeric ####
data ["int_rate"] = data["int_rate"].gsub(pattern = "%", replacement = "") # strip %
data [ "int_rate"] = data["int_rate"].trim() # trim ws
data ["int_rate"] = data["int_rate"].asnumeric() #change to a numeric
data["int_rate"].show()
int_rate |
---|
10.65 |
15.27 |
15.96 |
13.49 |
7.9 |
18.64 |
21.28 |
12.69 |
14.65 |
12.69 |
# Now try for revol_util yourself
revol_util |
---|
83.7 |
9.4 |
98.5 |
21 |
28.3 |
87.5 |
32.6 |
36.5 |
20.6 |
67.1 |
# Now we're going to clean up emp_length
# Use gsub to remove " year" and " years" also translate n/a to ""
data ["emp_length"] = data["emp_length"].gsub(pattern = "([ ]*+[a-zA-Z].*)|(n/a)",
replacement = "")
# Use trim to remove any trailing spaces
data ["emp_length"] = data["emp_length"].trim()
# Use sub to convert < 1 to 0 years and do the same for 10 + to 10
# Hint: Be mindful of spaces between characters
emp_length |
---|
10 |
0.5 |
10 |
10 |
3 |
9 |
4 |
0.5 |
5 |
10 |
# Task 6: Extract month and year from earliest_cr_line and make two new columns called
# earliest_cr_month and earliest_cr_year
earliest_cr_year |
---|
1985 |
1999 |
2001 |
1996 |
2004 |
2007 |
2004 |
2004 |
1998 |
1989 |
# Task 7: Extract month and year from issue_d and make two new columns called issue_d_month and issue_d_year
issue_d_year |
---|
2011 |
2011 |
2011 |
2011 |
2011 |
2011 |
2011 |
2011 |
2011 |
2011 |
# Task 8: Create new column called credit_length
# Hint: Do this by subtracting the earliest_cr_year from the issue_d_year
credit_length |
---|
26 |
12 |
10 |
15 |
7 |
4 |
7 |
7 |
13 |
22 |
# Task 9: Use the sub function to create two levels from the verification_status column. Ie "verified" and "not verified"
# Task 10: Do a test-train split (80-20)
# Task 11: Define your response and predictor variables
y="bad_loan"
x=[]
# Task 12: Set parameters for GBM model
# Task 13: Build your model
gbm Model Build Progress: [##################################################] 100%
# Task 14: View your model results