#!/usr/bin/env python # coding: utf-8 # #### New to Plotly? # Plotly's Python library is free and open source! [Get started](https://plotly.com/python/getting-started/) by dowloading the client and [reading the primer](https://plotly.com/python/getting-started/). #
You can set up Plotly to work in [online](https://plotly.com/python/getting-started/#initialization-for-online-plotting) or [offline](https://plotly.com/python/getting-started/#initialization-for-offline-plotting) mode, or in [jupyter notebooks](https://plotly.com/python/getting-started/#start-plotting-online). #
We also have a quick-reference [cheatsheet](https://images.plot.ly/plotly-documentation/images/python_cheat_sheet.pdf) (new!) to help you get started! # #### Imports # The tutorial below imports [Numpy](http://www.numpy.org/), [Pandas](https://plotly.com/pandas/intro-to-pandas-tutorial/), and [SciPy](https://www.scipy.org/). # In[1]: import plotly.plotly as py import plotly.graph_objs as go from plotly.tools import FigureFactory as FF import numpy as np import pandas as pd import scipy # #### Import Data # We will import a dataset to perform our discrete frequency analysis on. We will look at the consumption of alcohol by country in 2010. # In[2]: data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2010_alcohol_consumption_by_country.csv') df = data[0:10] table = FF.create_table(df) py.iplot(table, filename='alcohol-data-sample') # #### Probability Distribution # We can produce a histogram plot of the data with the y-axis representing the probability distribution of the data. # In[3]: x = data['alcohol'].values.tolist() trace = go.Histogram(x=x, histnorm='probability', xbins=dict(start=np.min(x), size=0.25, end=np.max(x)), marker=dict(color='rgb(25, 25, 100)')) layout = go.Layout( title="Histogram with Probability Distribution" ) fig = go.Figure(data=go.Data([trace]), layout=layout) py.iplot(fig, filename='histogram-prob-dist') # #### Frequency Counts # In[4]: trace = go.Histogram(x=x, xbins=dict(start=np.min(x), size=0.25, end=np.max(x)), marker=dict(color='rgb(25, 25, 100)')) layout = go.Layout( title="Histogram with Frequency Count" ) fig = go.Figure(data=go.Data([trace]), layout=layout) py.iplot(fig, filename='histogram-discrete-freq-count') # #### Percentage # In[5]: trace = go.Histogram(x=x, histnorm='percent', xbins=dict(start=np.min(x), size=0.25, end=np.max(x)), marker=dict(color='rgb(50, 50, 125)')) layout = go.Layout( title="Histogram with Frequency Count" ) fig = go.Figure(data=go.Data([trace]), layout=layout) py.iplot(fig, filename='histogram-percentage') # #### Cumulative Density Function # We can also take the cumulatve sum of our dataset and then plot the cumulative density function, or `CDF`, as a scatter plot # In[6]: cumsum = np.cumsum(x) trace = go.Scatter(x=[i for i in range(len(cumsum))], y=10*cumsum/np.linalg.norm(cumsum), marker=dict(color='rgb(150, 25, 120)')) layout = go.Layout( title="Cumulative Distribution Function" ) fig = go.Figure(data=go.Data([trace]), layout=layout) py.iplot(fig, filename='cdf-dataset') # In[1]: from IPython.display import display, HTML display(HTML('')) display(HTML('')) get_ipython().system(' pip install git+https://github.com/plotly/publisher.git --upgrade') import publisher publisher.publish( 'python-Discrete-Frequency.ipynb', 'python/discrete-frequency/', 'Discrete Frequency | plotly', 'Learn how to perform discrete frequency analysis using Python.', title='Discrete Frequency in Python. | plotly', name='Discrete Frequency', language='python', page_type='example_index', has_thumbnail='false', display_as='statistics', order=3, ipynb= '~notebook_demo/110') # In[ ]: