# run this cell to install pycaret in Google Colab
# !pip install pycaret
# If you are using Jupyter notebook, you can pip install pycaret using jupyter notebook or command line
# pip install pycaret
from pycaret.utils import version
version()
1.0.0
# only run this cell if you are using google colab
# from pycaret.utils import enable_colab
# enable_colab()
from pycaret.datasets import get_data
data = get_data('kiva')
country | en | gender | loan_amount | nonpayment | sector | status | |
---|---|---|---|---|---|---|---|
0 | Dominican Republic | "Banco Esperanza" is a group of 10 women looki... | F | 1225 | partner | Retail | 0 |
1 | Dominican Republic | "Caminemos Hacia Adelante" or "Walking Forward... | F | 1975 | lender | Clothing | 0 |
2 | Dominican Republic | "Creciendo Por La Union" is a group of 10 peop... | F | 2175 | partner | Clothing | 0 |
3 | Dominican Republic | "Cristo Vive" ("Christ lives" is a group of 10... | F | 1425 | partner | Clothing | 0 |
4 | Dominican Republic | "Cristo Vive" is a large group of 35 people, 2... | F | 4025 | partner | Food | 0 |
data.en[0]
'"Banco Esperanza" is a group of 10 women looking to receive a small loan. Each of them has taken out a very small loan already, so this would be their second. With this loan the group is going to try and expand their small businesses and start generating more income. <P>\r\n\r\nEduviges is the group representative and leader of the group. Eduviges has a lot on the line because she has 6 children that she has to take care of. She told me that those children are the reason she wants to be successful. She wants to be able to provide a different life for them and show them that they can be successful as well. <P>\r\n\r\nEduviges has a very small business selling shoes and Avon products. She plans to expand using this loan and dreams of success. The whole group is ready for this new challenge and are on the road to bettering their lives. On behalf of Eduviges, the group, and Esperanza International--------- Thank you for your support!!!!'
data = data.head(1000)
from pycaret.nlp import *
nlp1 = setup(data, target='en', session_id=786)
Description | Value |
---|---|
session_id | 786 |
# Documents | 1000 |
Vocab Size | 3870 |
Custom Stopwords | False |
lda = create_model('lda')
print(lda)
LdaModel(num_terms=3870, num_topics=4, decay=0.5, chunksize=100)
nmf = create_model('nmf')
print(nmf)
NMF(alpha=0.0, beta_loss='frobenius', init='nndsvd', l1_ratio=0.0, max_iter=200, n_components=4, random_state=786, shuffle=False, solver='cd', tol=0.0001, verbose=0)
plot_model()
plot_model(lda)
plot_model(lda, plot = 'tsne')
plot_model(nmf, plot = 'tsne')
evaluate_model(lda)
interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Frequency Plot', 'freque…
results = assign_model(lda)
results.head()
country | en | gender | loan_amount | nonpayment | sector | status | Topic_0 | Topic_1 | Topic_2 | Topic_3 | Dominant_Topic | Perc_Dominant_Topic | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Dominican Republic | group woman look receive small loan take small... | F | 1225 | partner | Retail | 0 | 0.255102 | 0.114688 | 0.627853 | 0.002357 | Topic 2 | 0.63 |
1 | Dominican Republic | walk forward group entrepreneur seek second lo... | F | 1975 | lender | Clothing | 0 | 0.570880 | 0.332298 | 0.094765 | 0.002057 | Topic 0 | 0.57 |
2 | Dominican Republic | group people hope start business group look re... | F | 2175 | partner | Clothing | 0 | 0.165148 | 0.044879 | 0.786944 | 0.003029 | Topic 2 | 0.79 |
3 | Dominican Republic | live group woman look receive first loan young... | F | 1425 | partner | Clothing | 0 | 0.324298 | 0.075029 | 0.598386 | 0.002287 | Topic 2 | 0.60 |
4 | Dominican Republic | vive large group people hope take loan many se... | F | 4025 | partner | Food | 0 | 0.212560 | 0.152464 | 0.632607 | 0.002369 | Topic 2 | 0.63 |