#!/usr/bin/env python
# coding: utf-8
# #### New to Plotly?
# Plotly's Python library is free and open source! [Get started](https://plotly.com/python/getting-started/) by dowloading the client and [reading the primer](https://plotly.com/python/getting-started/).
# You can set up Plotly to work in [online](https://plotly.com/python/getting-started/#initialization-for-online-plotting) or [offline](https://plotly.com/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plotly.com/python/getting-started/#start-plotting-online).
# We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started!
# #### Imports
# The tutorial below imports [Numpy](http://www.numpy.org/), [Pandas](https://plotly.com/pandas/intro-to-pandas-tutorial/), and [SciPy](https://www.scipy.org/).
# In[1]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.tools import FigureFactory as FF
import numpy as np
import pandas as pd
import scipy
# #### Import Data
# We will import a dataset to perform our discrete frequency analysis on. We will look at the consumption of alcohol by country in 2010.
# In[2]:
data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2010_alcohol_consumption_by_country.csv')
df = data[0:10]
table = FF.create_table(df)
py.iplot(table, filename='alcohol-data-sample')
# #### Probability Distribution
# We can produce a histogram plot of the data with the y-axis representing the probability distribution of the data.
# In[3]:
x = data['alcohol'].values.tolist()
trace = go.Histogram(x=x, histnorm='probability',
xbins=dict(start=np.min(x),
size=0.25,
end=np.max(x)),
marker=dict(color='rgb(25, 25, 100)'))
layout = go.Layout(
title="Histogram with Probability Distribution"
)
fig = go.Figure(data=go.Data([trace]), layout=layout)
py.iplot(fig, filename='histogram-prob-dist')
# #### Frequency Counts
# In[4]:
trace = go.Histogram(x=x,
xbins=dict(start=np.min(x),
size=0.25,
end=np.max(x)),
marker=dict(color='rgb(25, 25, 100)'))
layout = go.Layout(
title="Histogram with Frequency Count"
)
fig = go.Figure(data=go.Data([trace]), layout=layout)
py.iplot(fig, filename='histogram-discrete-freq-count')
# #### Percentage
# In[5]:
trace = go.Histogram(x=x, histnorm='percent',
xbins=dict(start=np.min(x),
size=0.25,
end=np.max(x)),
marker=dict(color='rgb(50, 50, 125)'))
layout = go.Layout(
title="Histogram with Frequency Count"
)
fig = go.Figure(data=go.Data([trace]), layout=layout)
py.iplot(fig, filename='histogram-percentage')
# #### Cumulative Density Function
# We can also take the cumulatve sum of our dataset and then plot the cumulative density function, or `CDF`, as a scatter plot
# In[6]:
cumsum = np.cumsum(x)
trace = go.Scatter(x=[i for i in range(len(cumsum))], y=10*cumsum/np.linalg.norm(cumsum),
marker=dict(color='rgb(150, 25, 120)'))
layout = go.Layout(
title="Cumulative Distribution Function"
)
fig = go.Figure(data=go.Data([trace]), layout=layout)
py.iplot(fig, filename='cdf-dataset')
# In[1]:
from IPython.display import display, HTML
display(HTML(''))
display(HTML(''))
get_ipython().system(' pip install git+https://github.com/plotly/publisher.git --upgrade')
import publisher
publisher.publish(
'python-Discrete-Frequency.ipynb', 'python/discrete-frequency/', 'Discrete Frequency | plotly',
'Learn how to perform discrete frequency analysis using Python.',
title='Discrete Frequency in Python. | plotly',
name='Discrete Frequency',
language='python',
page_type='example_index', has_thumbnail='false', display_as='statistics', order=3,
ipynb= '~notebook_demo/110')
# In[ ]: