import pandas as pd
import numpy as np
%matplotlib inline
from plotly import __version__
print(__version__)
2.2.3
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
IOPub data rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_data_rate_limit`.
# Some random data
df = pd.DataFrame(np.random.randn(800,4), columns='Sabrina Rose Lusi Fiona'.split())
df.head()
Sabrina | Rose | Lusi | Fiona | |
---|---|---|---|---|
0 | -0.625979 | -1.338837 | 1.537151 | 1.333365 |
1 | -0.017143 | 0.043123 | -0.950405 | -0.035388 |
2 | -1.332343 | 0.067920 | 0.919870 | 1.612951 |
3 | 0.409426 | -0.008151 | -0.636676 | -0.276871 |
4 | 0.434980 | -0.642573 | -0.093593 | -1.935671 |
%matplotlib inline
# Using Interactive Image from Cufflinks
df.iplot()
df.iplot(kind='scatter', x='Sabrina', y='Rose', title = 'Friendship Scatter Plot', mode='markers', size=20)
Two of my besties randomly scattered.
# Histogram for all variables in the dataset
df.iplot(kind = 'hist', title='Frienship Histogram', bins=50)
df.sum().iplot(kind='bar', title='Cumulative Friendship')
# Boxplot of Friendship
df.iplot(kind='box', title='Box Plot of Friendship')
# Scatter Matrix
df.scatter_matrix()
# Spread type visulization
df[['Rose', 'Sabrina']].iplot(kind='spread')
# Some more random data
df2 = pd.DataFrame({'Partner':['Rose', 'Lexie', 'Fiona'], 'Scores':[200, 120, 150]})
df2
Partner | Scores | |
---|---|---|
0 | Rose | 200 |
1 | Lexie | 120 |
2 | Fiona | 150 |
# Barplot to show the coolness of my friends
df2.iplot(kind='bar', x='Partner', y='Scores', title='Friendship Score')
Now it is apparent that Rose is the boss! Love Fiona -- the best cat friend ever! Lexie is actually just imaginary.
df3 = pd.DataFrame(np.random.rand(10, 4), columns=['Lilo', 'Stitch', 'Moana', 'Maui'])
print(df3)
df3.iplot(kind='barh',barmode='stack')
Lilo Stitch Moana Maui 0 0.638046 0.409626 0.478375 0.436112 1 0.008412 0.275196 0.317967 0.345123 2 0.057729 0.720178 0.011990 0.443910 3 0.560254 0.990900 0.844228 0.891118 4 0.259466 0.272165 0.809376 0.917269 5 0.290856 0.739409 0.007345 0.448449 6 0.304495 0.026588 0.807316 0.204141 7 0.068016 0.935381 0.830328 0.084937 8 0.042957 0.934351 0.911425 0.652651 9 0.643267 0.686456 0.671520 0.575985
df3.iplot(kind='area', fill=True)
# 3D Surface plot
df3.iplot(kind='surface', colorscale='Set1', title='Disney Battle')
# Import my mortality rate dataset
import pandas as pd
study = pd.read_csv('study1.csv')
study.head(5)
Country | underFiveMortality | totalFertilityRate | teenFertility | GDPPerCapita | govtExOnHealthPerCapita | agriPercentGDP | womenMeanYearsInSchool | improvedDrinkingWaterSourcesInPercentage | improvedSanitationFacilitiesInPercentage | lifeExpectancy | totalPopulation | DTP3ImmunizedInPercentage | contraceptivePrevalenceInPercentage | CO2 | pneumoniaDeathsInNewborns | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Afghanistan | 105.0 | 5.66 | 107.0 | 1637 | 4.390408 | 29.915323 | 0.8 | 50.0 | 37.0 | 53.6 | 31411743 | 66.0 | NaN | 0.08 | 6.288837 |
1 | Albania | 16.6 | 1.74 | 11.2 | 9374 | 94.023613 | 20.160029 | 10.7 | 95.0 | 94.0 | 77.2 | 3204284 | 99.0 | NaN | 0.43 | 0.238741 |
2 | Algeria | 27.4 | 2.82 | 7.0 | 12494 | 138.840923 | 6.915571 | 7.1 | 83.0 | 95.0 | 76.0 | 35468208 | 95.0 | NaN | 0.90 | 3.021863 |
3 | Argentina | 14.6 | 2.22 | 68.2 | 15765 | 405.058875 | 10.003471 | 11.4 | NaN | NaN | 75.8 | 40412376 | 94.0 | NaN | 1.24 | 0.155241 |
4 | Armenia | 18.0 | 1.55 | 28.3 | 6508 | 54.238760 | 19.577842 | 11.4 | 98.0 | 90.0 | 73.0 | 3092072 | 94.0 | 54.9 | 0.39 | 1.401095 |
# Scatter Plots
study.iplot(kind='scatter', mode='markers', x='GDPPerCapita', y='lifeExpectancy', xTitle='GDP per Capita', yTitle='Life Expectancy', title='GDPpc VS Life Expectancy 2010')
study.iplot(kind='bubble', x='GDPPerCapita', y='lifeExpectancy', size='totalPopulation', text='Country', categories='lifeExpectancy', showlegend=False, colorscale='Set1', xTitle='GDP per Capita', yTitle='Life Expectancy', title='GDPpc VS Life Expectancy 2010')
Interesting to see that life expectancy improves faster when GDP per Capita is lower (steeper positive slope). When GDPpc hits a certain point like China and India, improvement of life expectancy slows down. The colors of the bubbles are based on life expectancy.
study.iplot(kind='bubble', x='totalFertilityRate', y='underFiveMortality', size='totalPopulation', text='Country', categories='lifeExpectancy', showlegend=False, colorscale='Set1', xTitle='totalFertilityRate', yTitle='underFiveMortality', title='Total Fertility Rate VS Child Mortality Rate 2010')
Interesting to see that there is a positive correlation between fertility and child mortality.
study.iplot(kind='bubble', x='womenMeanYearsInSchool', y='underFiveMortality', size='totalPopulation', text='Country', categories='lifeExpectancy', showlegend=False, colorscale='Set1', xTitle='womenMeanYearsInSchool', yTitle='underFiveMortality', title='Women Mean Years In School VS Child Mortality Rate 2010')
Women who are more educated are more likely to know how to take care of their child/children.
study.iplot(kind='bubble', x='agriPercentGDP', y='underFiveMortality', size='GDPPerCapita', text='Country', categories='lifeExpectancy', showlegend=False, colorscale='Set1', xTitle='agriPercentGDP', yTitle='underFiveMortality', title='Agriculture Percent GDP VS Child Mortality Rate 2010')
Note that in this graph, the bigger the bubble the higer the GDPpc. The big bubble countries have lower percentage of agriculture in their GDP and tend to have lower child mortality rate. On the other hands, the smaller bubble countries with higher percentage of agriculture tend to have higher child mortality rate.
# Heatmap to study correlations
study.corr().iplot(kind='heatmap', colorscale='spectral')