# importando a biblioteca pandas as pd
import pandas as pd
# importando os dados no 'dataframe'
dataframe = pd.read_csv('Dados de Credito.csv', encoding = 'utf-8', sep = ',')
# removendo os valores faltantes do 'dataframe'
dataframe = dataframe.dropna()
# visualizando o 'dataframe'
dataframe
clientid | income | age | loan | default | |
---|---|---|---|---|---|
0 | 1 | 66155.925095 | 59.017015 | 8106.532131 | 0 |
1 | 2 | 34415.153966 | 48.117153 | 6564.745018 | 0 |
2 | 3 | 57317.170063 | 63.108049 | 8020.953296 | 0 |
3 | 4 | 42709.534201 | 45.751972 | 6103.642260 | 0 |
4 | 5 | 66952.688845 | 18.584336 | 8770.099235 | 1 |
... | ... | ... | ... | ... | ... |
1995 | 1996 | 59221.044874 | 48.518179 | 1926.729397 | 0 |
1996 | 1997 | 69516.127573 | 23.162104 | 3503.176156 | 0 |
1997 | 1998 | 44311.449262 | 28.017167 | 5522.786693 | 1 |
1998 | 1999 | 43756.056605 | 63.971796 | 1622.722598 | 0 |
1999 | 2000 | 69436.579552 | 56.152617 | 7378.833599 | 0 |
1997 rows × 5 columns
# importando a biblioteca matplotlib do python
import matplotlib.pyplot as plt
%matplotlib inline
# income x age
plt.figure(figsize = (10, 5))
plt.title('Outliers da Idade em Função da Renda')
plt.scatter(dataframe.iloc[:, 1], dataframe.iloc[:, 2])
<matplotlib.collections.PathCollection at 0x146324b3108>
# income x loan
plt.figure(figsize = (10, 5))
plt.title('Outliers da Renda em Função do Empréstimo')
plt.scatter(dataframe.iloc[:, 1], dataframe.iloc[:, 3])
<matplotlib.collections.PathCollection at 0x1463251d648>
# age x loan
plt.figure(figsize = (10, 5))
plt.title('Outliers da Renda em Função da Idade')
plt.scatter(dataframe.iloc[:, 2], dataframe.iloc[:, 3])
<matplotlib.collections.PathCollection at 0x14632528388>
# removendo os valores inconsistentes em 'age'
dataframe.loc[dataframe.age < 0, 'age'] = 40.92
# visualizando a média da renda
dataframe.loan.mean()
4445.487715888529
# removendo os valores inconsistentes em 'loan'
dataframe.loc[dataframe.loan > 13400, 'loan'] = 4445.48
# income x age
plt.figure(figsize = (10, 5))
plt.title('Outliers da Idade em Função da Renda')
plt.scatter(dataframe.iloc[:, 1], dataframe.iloc[:, 2])
<matplotlib.collections.PathCollection at 0x146333410c8>
# income x loan
plt.figure(figsize = (10, 5))
plt.title('Outliers da Renda em Função do Empréstimo')
plt.scatter(dataframe.iloc[:, 1], dataframe.iloc[:, 3])
<matplotlib.collections.PathCollection at 0x1463345dd08>
# age x loan
plt.figure(figsize = (10, 5))
plt.title('Outliers da Renda em Função da Idade')
plt.scatter(dataframe.iloc[:, 2], dataframe.iloc[:, 3])
<matplotlib.collections.PathCollection at 0x14633423ac8>
# importando os dados no 'dataframe'
dataframe = pd.read_csv('census.csv', encoding = 'utf-8', sep = ',')
# visualizando o 'dataframe'
dataframe
age | workclass | final-weight | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loos | hour-per-week | native-country | income | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | State-gov | 77516 | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | United-States | <=50K |
1 | 50 | Self-emp-not-inc | 83311 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | United-States | <=50K |
2 | 38 | Private | 215646 | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <=50K |
3 | 53 | Private | 234721 | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | United-States | <=50K |
4 | 28 | Private | 338409 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | Cuba | <=50K |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
32556 | 27 | Private | 257302 | Assoc-acdm | 12 | Married-civ-spouse | Tech-support | Wife | White | Female | 0 | 0 | 38 | United-States | <=50K |
32557 | 40 | Private | 154374 | HS-grad | 9 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0 | 0 | 40 | United-States | >50K |
32558 | 58 | Private | 151910 | HS-grad | 9 | Widowed | Adm-clerical | Unmarried | White | Female | 0 | 0 | 40 | United-States | <=50K |
32559 | 22 | Private | 201490 | HS-grad | 9 | Never-married | Adm-clerical | Own-child | White | Male | 0 | 0 | 20 | United-States | <=50K |
32560 | 52 | Self-emp-inc | 287927 | HS-grad | 9 | Married-civ-spouse | Exec-managerial | Wife | White | Female | 15024 | 0 | 40 | United-States | >50K |
32561 rows × 15 columns
# age x final wight
plt.figure(figsize = (10, 5))
plt.title('Outliers da Relevância Individual em Função da Idade')
plt.scatter(dataframe.iloc[:, 0], dataframe.iloc[:, 2])
<matplotlib.collections.PathCollection at 0x14633584588>