#!/usr/bin/env python # coding: utf-8 # Cufflinks binds [Plotly](https://plotly.com/python) directly to [pandas](http://pandas.pydata.org/) dataframes. # In[1]: import plotly.tools as tls tls.embed('https://plotly.com/~cufflinks/8') # #### Packages # Run `! pip install cufflinks --upgrade` to install Cufflinks. In addition to [Plotly](https://plotly.com/python), [pandas](http://pandas.pydata.org/) and Cufflinks, this tutorial will also use [NumPy](http://www.numpy.org/). # In[2]: import plotly.plotly as py import cufflinks as cf import pandas as pd import numpy as np print cf.__version__ # #### Dataframes # With Plotly's Python library, you can describe figures with DataFrame's series and index's # In[3]: df = cf.datagen.lines() py.iplot([{ 'x': df.index, 'y': df[col], 'name': col } for col in df.columns], filename='cufflinks/simple-line') # But with `cufflinks`, you can plot them directly # In[5]: df.iplot(kind='scatter', filename='cufflinks/cf-simple-line') # Almost every chart that you make in `cufflinks` will be created with just one line of code. # In[6]: df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) df.scatter_matrix(filename='cufflinks/scatter-matrix', world_readable=True) # Charts created with `cufflinks` are synced with your online Plotly account. You'll need to [configure your credentials](https://plotly.com/python/getting-started/) to get started. `cufflinks` can also be configured to work offline in IPython notebooks with [Plotly Offline](https://plotly.com/python/offline). To get started with Plotly Offline, [download a trial library](http://purchasing.plot.ly/plotly-offline-ipython) and run `cf.go_offline()`. # In[14]: cf.go_online() # switch back to online mode, where graphs are saved on your online plotly account # By default, plotly graphs are *public*. Make them private by setting `world_readable` to `False` # In[15]: df.a.iplot(kind='histogram', world_readable=False) # Only *you* (the creator) will be able to see this chart, or change the global, default settings with `cf.set_config_file` # In[16]: cf.set_config_file(offline=False, world_readable=True, theme='ggplot') # ### Chart Types # ##### Line Charts # In[17]: df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum() df.iplot(filename='cufflinks/line-example') # Plot one column vs another with `x` and `y` keywords # In[18]: df.iplot(x='A', y='B', filename='cufflinks/x-vs-y-line-example') # ##### Bar Charts # Download some civic data. A time series log of the 311 complaints in NYC. # In[ ]: df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1) df.head(3) # In[ ]: series = df['Complaint Type'].value_counts()[:20] series.head(3) # Plot a `series` directly # In[18]: series.iplot(kind='bar', yTitle='Number of Complaints', title='NYC 311 Complaints', filename='cufflinks/categorical-bar-chart') # Plot a dataframe row as a bar # In[19]: df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D']) row = df.ix[5] row.iplot(kind='bar', filename='cufflinks/bar-chart-row') # Call `iplot(kind='bar')` on a dataframe to produce a grouped bar chart # In[20]: df.iplot(kind='bar', filename='cufflinks/grouped-bar-chart') # In[21]: df.iplot(kind='bar', barmode='stack', filename='cufflinks/grouped-bar-chart') # Remember: plotly charts are interactive. Click on the legend entries to hide-and-show traces, click-and-drag to zoom, double-click to autoscale, shift-click to drag. # # ![Click on legend entries to hide and show traces](http://i.imgur.com/419Bmdy.gif) # Make your bar charts horizontal with `kind='barh'` # In[22]: df.iplot(kind='barh',barmode='stack', bargap=.1, filename='cufflinks/barh') # ##### Themes # # cufflinks ships with a few themes. View available themes with `cf.getThemes`, apply them with `cf.set_config_file` # In[23]: cf.getThemes() # In[24]: cf.set_config_file(theme='pearl') # ##### Histograms # In[4]: df = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000), 'c': np.random.randn(1000) - 1}) df.iplot(kind='histogram', filename='cufflinks/basic-histogram') # Customize your histogram with # - `barmode` (`overlay` | `group` | `stack`) # - `bins` (`int`) # - `histnorm` (`'' | 'percent' | 'probability' | 'density' | 'probability density'`) # - `histfunc` (`'count' | 'sum' | 'avg' | 'min' | 'max'`) # In[27]: df.iplot(kind='histogram', barmode='stack', bins=100, histnorm='probability', filename='cufflinks/customized-histogram') # Like every chart type, split your traces into subplots or small-multiples with `subplots` and optionally `shape`. More on `subplots` below. # In[28]: df.iplot(kind='histogram', subplots=True, shape=(3, 1), filename='cufflinks/histogram-subplots') # ##### Box Plots # In[29]: df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) df.iplot(kind='box', filename='cufflinks/box-plots') # #### Area Charts # # To produce stacked area plot, each column must be either all positive or all negative values. # # When input data contains `NaN`, it will be automatically filled by 0. If you want to drop or fill by different values, use `dataframe.dropna()` or `dataframe.fillna()` before calling plot. # In[30]: df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) # In[31]: df.iplot(kind='area', fill=True, filename='cuflinks/stacked-area') # For non-stacked area charts, set `kind=scatter` with `fill=True` # In[32]: df.iplot(fill=True, filename='cuflinks/filled-area') # #### Scatter Plot # # Set `x` and `y` as column names. If `x` isn't supplied, `df.index` will be used. # In[5]: import pandas as pd df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t') df2007 = df[df.year==2007] df1952 = df[df.year==1952] df2007.iplot(kind='scatter', mode='markers', x='gdpPercap', y='lifeExp', filename='cufflinks/simple-scatter') # Plotting multiple column scatter plots isn't as easy with cufflinks. Here is an example with Plotly's native syntax # In[35]: fig = { 'data': [ {'x': df2007.gdpPercap, 'y': df2007.lifeExp, 'text': df2007.country, 'mode': 'markers', 'name': '2007'}, {'x': df1952.gdpPercap, 'y': df1952.lifeExp, 'text': df1952.country, 'mode': 'markers', 'name': '1952'} ], 'layout': { 'xaxis': {'title': 'GDP per Capita', 'type': 'log'}, 'yaxis': {'title': "Life Expectancy"} } } py.iplot(fig, filename='cufflinks/multiple-scatter') # Grouping isn't as easy either. But, with Plotly's native syntax: # In[36]: py.iplot( { 'data': [ { 'x': df[df['year']==year]['gdpPercap'], 'y': df[df['year']==year]['lifeExp'], 'name': year, 'mode': 'markers', } for year in [1952, 1982, 2007] ], 'layout': { 'xaxis': {'title': 'GDP per Capita', 'type': 'log'}, 'yaxis': {'title': "Life Expectancy"} } }, filename='cufflinks/scatter-group-by') # #### Bubble Charts # Add `size` to create a bubble chart. Add hover text with the `text` attribute. # In[37]: df2007.iplot(kind='bubble', x='gdpPercap', y='lifeExp', size='pop', text='country', xTitle='GDP per Capita', yTitle='Life Expectancy', filename='cufflinks/simple-bubble-chart') # #### Subplots # `subplots=True` partitions columns into separate subplots. Specify rows and columns with `shape=(rows, cols)` and share axes with `shared_xaxes=True` and `shared_yaxes=True`. # In[38]: df=cf.datagen.lines(4) df.iplot(subplots=True, shape=(4,1), shared_xaxes=True, fill=True, filename='cufflinks/simple-subplots') # Add subplot titles with `subplot_titles` as a list of titles or `True` to use column names. # In[39]: df.iplot(subplots=True, subplot_titles=True, legend=False) # #### Scatter matrix # In[40]: df.scatter_matrix(filename='cufflinks/scatter-matrix-subplot', world_readable=True) # #### Heatmaps # In[41]: cf.datagen.heatmap(20,20).iplot(kind='heatmap',colorscale='spectral', filename='cufflinks/simple-heatmap') # #### Lines and Shaded Areas # # Use `hline` and `vline` for horizontal and vertical lines. # In[42]: df=cf.datagen.lines(3,columns=['a','b','c']) # In[43]: df.iplot(hline=[2,4],vline=['2015-02-10']) # Draw shaded regions with `hspan` # In[44]: df.iplot(hspan=[(-1,1),(2,5)], filename='cufflinks/shaded-regions') # Extra parameters can be passed in the form of dictionaries, width, fill, color, fillcolor, opacity # In[45]: df.iplot(vspan={'x0':'2015-02-15','x1':'2015-03-15','color':'rgba(30,30,30,0.3)','fill':True,'opacity':.4}, filename='cufflinks/custom-regions') # ### Customizing Figures # # `cufflinks` is designed for simple one-line charting with Pandas and Plotly. All of the Plotly chart attributes are not directly assignable in the `df.iplot` call signature. # # To update attributes of a `cufflinks` chart that aren't available, first convert it to a figure (`asFigure=True`), then tweak it, then plot it with `plotly.plotly.iplot`. # # Here is an example of a simple plotly figure. You can find more examples in [our online python documentation](https://plotly.com/python). # In[46]: from plotly.graph_objs import * py.iplot({ 'data': [ Bar(**{ 'x': [1, 2, 3], 'y': [3, 1, 5], 'name': 'first trace', 'type': 'bar' }), Bar(**{ 'x': [1, 2, 3], 'y': [4, 3, 6], 'name': 'second trace', 'type': 'bar' }) ], 'layout': Layout(**{ 'title': 'simple example' }) }, filename='cufflinks/simple-plotly-example') # `cufflinks` generates these figure's that describe plotly graphs. For example, this graph: # In[48]: df.iplot(kind='scatter', filename='cufflinks/simple-scatter-example') # has this description: # In[49]: figure = df.iplot(kind='scatter', asFigure=True) print figure.to_string() # So, if you want to edit any attribute of a Plotly graph from cufflinks, first convert it to a figure and then edit the figure objects. Let's add a yaxis title, tick suffixes, and new legend names to this example: # In[50]: figure['layout']['yaxis1'].update({'title': 'Price', 'tickprefix': '$'}) for i, trace in enumerate(figure['data']): trace['name'] = 'Trace {}'.format(i) py.iplot(figure, filename='cufflinks/customized-chart') # [See more examples of Plotly graphs](https://plotly.com/python/) or [view the entire reference of valid attributes](https://plotly.com/python/reference/) # #### Cufflinks Reference # Cufflinks is [open source on github](https://github.com/santosjorge/cufflinks)! # In[52]: help(df.iplot) # In[6]: #!pip install git+https://github.com/plotly/publisher.git --upgrade import publisher publisher.publish( 'cufflinks.ipynb', 'ipython-notebooks/cufflinks/', 'Cufflinks - Easy Pandas DataFrame Graphing with Plotly | plotly', 'An overview of cufflinks, a library for easy interactive Pandas charting with Plotly.', title = 'Cufflinks - Easy Pandas DataFrame Graphing with Plotly | plotly', name = 'Cufflinks', thumbnail='thumbnail/line-plot.jpg', language='python', ipynb= '~notebook_demo/3') # In[ ]: