#!/usr/bin/env python
# coding: utf-8

# In[55]:


import plotly.graph_objs as go
import plotly.plotly as py
# from plotly import tools
from plotly.offline import init_notebook_mode
import plotly.offline as offline
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import rgb2hex


# In[56]:


init_notebook_mode(connected=False)


# In[57]:


# tools.set_credentials_file(username='your_username', api_key='your_apikey')


# In[58]:


survey_df = pd.read_csv('pythondevsurvey2017_raw_data.csv')
survey_df.columns = [c.lower() for c in survey_df.columns]
survey_df.head()


# In[59]:


def find_cols(df, kws):
    '''找到 df 中含有 kws 的列'''
    return [item for item in df.columns if all ([w in item for w in kws])]


# ## 散点图/折线图

# In[60]:


# mode 可以是 ['lines', 'markers', 'text'] 三者的任意组合
# 颜色支持 rgb 和 十六进制格式
# 对于大数据量可以使用 go.Scattergl 进行绘制
trace1 = go.Scatter(x=np.arange(100), y = np.random.rand(100), mode='lines+markers',
                   marker={
                       'color': 'red'
                   })
data = [trace1]
layout = {
    'title': '散点+折线'
}
# go.FigureWidget(data=data, layout=layout)

fig = go.Figure(data=data, layout=layout)
# py.iplot(fig, show_link=False)

offline.iplot(fig, show_link=False)


# ## 使用 Python 2 和 Python 3 的开发者的比例？

# In[61]:


python_version = survey_df['which version of python do you use the most?']
counts = python_version.value_counts()
counts


# In[62]:


labels = counts.index
values = counts.values
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

trace = go.Pie(labels=labels, values=values,
               marker={
                   'colors': colors
               },
               rotation=0,
               hoverinfo='label+value')
data = [trace]
layout = {
    'title': 'Python 2 VS Python 3'
}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)

offline.iplot(fig, show_link=False)


# ## 做数据分析和机器学习的人中分别有多少人使用的是 Python 3？

# In[63]:


python_da_ml = survey_df[['machine learning:\xa0what do you use python for?', 'data analysis:\xa0what do you use python for?', 'which version of python do you use the most?']]
python_da = pd.crosstab(python_da_ml['which version of python do you use the most?'], python_da_ml['data analysis:\xa0what do you use python for?'], normalize=True)
python_ml = pd.crosstab(python_da_ml['which version of python do you use the most?'], python_da_ml['machine learning:\xa0what do you use python for?'], normalize=True)


# In[64]:


da_ml = pd.concat([python_da, python_ml], axis=1)
da_ml


# In[65]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

python2 = go.Bar(x=da_ml.columns, y=da_ml.loc['Python 2'], name='Python 2', marker={'color': colors[0]})
python3 = go.Bar(x=da_ml.columns, y=da_ml.loc['Python 3'], name='Python 3', marker={'color': colors[1]})

data = [python2, python3]
# go.FigureWidget(data)
fig = go.Figure(data=data)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 做数据分析和机器学习的人常用的框架？

# In[66]:


cols = find_cols(survey_df, 'what framework(s) do you use in addition to python?')
frameworks = survey_df[cols[1:]]
count_df = frameworks.count().sort_values(ascending=False)
count_df.index = [item.split(':')[0] for item in count_df.index]
count_df.head()


# In[67]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_df))]
trace = go.Bar(x=count_df.index, y=count_df.values, marker={'color': colors})

data = [trace]
layout = {'title': 'Framework Usage'}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[68]:


# 对于 Y 轴刻度标签太长的情况，可以设置 layout 中 yaxis 的 automargin 属性为 True
# 也可以自定义 margin
trace = go.Bar(y=count_df.index[::-1], x=count_df.values[::-1], marker={'color': colors[::-1]}, orientation='h')

data = [trace]
layout = go.Layout(
    title='Framework Usage',
    margin={'r': 10},
    height=1000,
    yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[69]:


frameworks_pyver = frameworks.apply(lambda col: pd.crosstab(index=python_version, columns=col).iloc[:, 0])
frameworks_pyver = frameworks_pyver / frameworks_pyver.sum(axis=0)
frameworks_pyver.columns = [item.split(':')[0] for item in frameworks.columns]
frameworks_pyver


# In[70]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Bar(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 2'], marker={'color': colors[0]}, name='Python 2')
py3 = go.Bar(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 3'], marker={'color': colors[1]}, name='Python 3')

data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[71]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Bar(y=frameworks_pyver.columns, x=frameworks_pyver.loc['Python 2'], marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Bar(y=frameworks_pyver.columns, x=frameworks_pyver.loc['Python 3'], marker={'color': colors[1]}, orientation='h', name='Python 3')

data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks', height=1000, yaxis={'automargin': True})
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[72]:


import plotly.figure_factory as ff


# In[73]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
fig = ff.create_distplot(hist_data=[frameworks_pyver.loc['Python 2'], frameworks_pyver.loc['Python 3']], group_labels=['Python 2', 'Python 3'], bin_size=0.05, colors=colors)
# go.FigureWidget(fig)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[74]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Scatter(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 2'], mode='markers', marker={'color': colors[0]}, name='Python 2')
py3 = go.Scatter(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 3'], mode='markers', marker={'color': colors[1]}, name='Python 3')

data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[75]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Scatter(y=frameworks_pyver.columns[::-1], x=frameworks_pyver.loc['Python 2'][::-1], mode='markers', marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Scatter(y=frameworks_pyver.columns[::-1], x=frameworks_pyver.loc['Python 3'][::-1], mode='markers', marker={'color': colors[1]}, orientation='h', name='Python 3')

data = [py2, py3]
layout = go.Layout(
    title='Python 2 and Python 3 Usage among Frameworks',
    margin={'r': 10},
    height=1000,
    yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[76]:


frameworks_pyver


# In[77]:


sorted_frameworks_pyver = frameworks_pyver.sort_values(by='Python 3', axis=1, ascending=False)
sorted_frameworks_pyver


# In[78]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Scatter(y=sorted_frameworks_pyver.columns[::-1], x=sorted_frameworks_pyver.loc['Python 2'][::-1], mode='markers', marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Scatter(y=sorted_frameworks_pyver.columns[::-1], x=sorted_frameworks_pyver.loc['Python 3'][::-1], mode='markers', marker={'color': colors[1]}, orientation='h', name='Python 3')

data = [py2, py3]
layout = go.Layout(
    title='Python 2 and Python 3 Usage among Frameworks',
    margin={'r': 10},
    height=1000,
    yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 做数据分析和机器学习的人常用的框架？

# In[79]:


cols = find_cols(survey_df, ['use', 'python', 'most'])
uses = survey_df['what do you use python for the most?']
frameworks_uses = frameworks.apply(lambda col: pd.crosstab(index=uses, columns=col).iloc[:, 0])
frameworks_uses.columns = [item.split(':')[0] for item in frameworks_uses.columns]
frameworks_uses.head()


# In[80]:


da_ml_frameworks_uses = frameworks_uses.loc[['Data analysis', 'Machine learning']]


# In[81]:


# fill 的可选值为：['none', 'tozeroy', 'tozerox', 'tonexty', 'tonextx', 'toself', 'tonext']
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

da = go.Scatter(x=da_ml_frameworks_uses.columns, y=da_ml_frameworks_uses.loc['Data analysis'], fill='tozeroy', marker={'color': colors[0]}, name='Python 2')
ml = go.Scatter(x=da_ml_frameworks_uses.columns, y=da_ml_frameworks_uses.loc['Machine learning'], fill='tozeroy', marker={'color': colors[1]}, name='Python 3')

data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[82]:


sorted_da_ml_frameworks_uses = da_ml_frameworks_uses.sort_values(by='Data analysis', axis=1, ascending=True)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

da = go.Scatter(x=sorted_da_ml_frameworks_uses.columns, y=sorted_da_ml_frameworks_uses.loc['Data analysis'], fill='tozeroy', marker={'color': colors[0]}, name='Python 2')
ml = go.Scatter(x=sorted_da_ml_frameworks_uses.columns, y=sorted_da_ml_frameworks_uses.loc['Machine learning'], fill='tozeroy', marker={'color': colors[1]}, name='Python 3')

data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[83]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

da = go.Bar(y=sorted_da_ml_frameworks_uses.columns, x=sorted_da_ml_frameworks_uses.loc['Data analysis'], marker={'color': colors[0]}, orientation='h', name='Data analysis')
ml = go.Bar(y=sorted_da_ml_frameworks_uses.columns, x=sorted_da_ml_frameworks_uses.loc['Machine learning'], marker={'color': colors[1]}, orientation='h', name='Machine learning')

data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers', height=1000, yaxis={'automargin': True})
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 公司规模大小和是否使用 Python 3 的关系？

# In[84]:


cols = find_cols(survey_df, ['how', 'many', 'people', 'project'])
team_scale = survey_df[cols[0]]
team_pyver = pd.crosstab(team_scale, python_version)
team_pyver = team_pyver.reindex(['2-7 people', '8-12 people', '13-20 people', '21-40 people', 'More than 40 people'])
team_pyver_sorted = team_pyver.div(team_pyver.sum(axis=1), axis=0).sort_values(by='Python 3', ascending=False)
team_pyver_sorted


# In[85]:


trace = go.Scatter(x=team_pyver_sorted.index, y=team_pyver_sorted['Python 3'], marker={'color': colors[0]}, mode='lines+markers', line={'width': 2})

data = [trace]
layout = go.Layout(title='Team scale VS Use ratio of Python 3')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 开发者年龄和是否使用 Python 3 的关系？

# In[86]:


cols = find_cols(survey_df, ['age', 'range'])
age = survey_df[cols[0]]
age_pyver = pd.crosstab(index=age, columns=python_version)
age_pyver = age_pyver.div(age_pyver.sum(axis=1), axis=0)
age_pyver


# In[87]:


trace = go.Scatter(x=age_pyver.index, y=age_pyver['Python 3'], marker={'color': colors[0]}, mode='lines+markers', line={'width': 2}, name='Python 3')

data = [trace]
layout = go.Layout(title="The developers' age VS The use ratio of Python 3")
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[88]:


country_age = pd.crosstab([survey_df['what country do you live in?'], survey_df['which version of python do you use the most?']], survey_df['could you tell us your age range?'])
country_age_total = country_age.sum(level=0)
country_age_total.head()


# In[89]:


sorted_country_age_total = country_age_total.sort_values(by='60 or older', ascending=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 10)]
trace = go.Bar(x=sorted_country_age_total.index[:10], y=sorted_country_age_total.iloc[:10, -1], marker={'color': colors})

data = [trace]
layout = {'title': 'Top 10 countries of # of the developers whose age are 60+'}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[90]:


three_countries = country_age_total.loc[['United States', 'India', 'China']]
three_countries = three_countries.div(three_countries.sum(axis=1), axis=0)
three_countries


# In[91]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 7)]

data = [go.Bar(x=three_countries.index, y=three_countries[c], marker={'color': colors[i]}, name=c)
        for i, c in enumerate(three_countries.columns)]
layout = go.Layout(title="Age distribution of the developers who're from USA, India and China")
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 使用 Python 3 和 Python 2 的开发者的国别分布？

# In[92]:


cols = find_cols(survey_df, ['country', 'live'])
countries = survey_df[cols[0]]
count_countries = countries.value_counts(ascending=False)
countries.head()


# In[93]:


# 等同于 sns.countplot
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 10)]

trace = go.Bar(x=count_countries.index[:10], y=count_countries[:10], marker={'color': colors})

data = [trace]
layout = go.Layout(title='Top 10 countries of # of the developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[94]:


countries_pyver = pd.crosstab(index=countries, columns=python_version)
top10_countries = countries_pyver.loc[countries.value_counts()[:10].index]
top10_countries = top10_countries.div(top10_countries.sum(axis=1), axis=0)
top10_countries.head()


# In[95]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

py2 = go.Bar(x=top10_countries.index, y=top10_countries['Python 2'], marker={'color': colors[0]}, name='Python 2')
py3 = go.Bar(x=top10_countries.index, y=top10_countries['Python 3'], marker={'color': colors[1]}, name='Python 3')

data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Different Countries')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[96]:


countries_pyver_ratio = countries_pyver.div(countries_pyver.sum(axis=1), axis=0)


# In[97]:


# 等同于 sns.distplot
# 注意 hist_data, group_labels, colors 都必须是列表形式，一个元素表示一个数据集
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
fig = ff.create_distplot(hist_data=[countries_pyver_ratio['Python 3']], group_labels=['Python 3'], bin_size=0.05, colors=[colors[0]])
fig['layout'].update(title='Use ratio of Python 3 in the world')
# go.FigureWidget(fig)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[98]:


# 等同于 sns.tripplot
trace = go.Scatter(x=countries_pyver_ratio['Python 3'], y=['Python 3'] * len(countries_pyver_ratio), mode='markers', marker={'color': colors[0]})
data = [trace]
# go.FigureWidget(data)
fig = go.Figure(data=data)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[99]:


fig = {
    "data": [{
        "type": 'violin',
        "y": countries_pyver_ratio['Python 3'],
        "box": {
            "visible": True
        },
        "line": {
            "color": 'black'
        },
        "meanline": {
            "visible": True
        },
        "fillcolor": colors[0],
        "opacity": 0.6,
        "x0": 'Total Bill'
    }],
    "layout" : {
        "title": "Use ratio of Python 3 in the world",
        "yaxis": {
            "zeroline": False,
        }
    }
}

# go.FigureWidget(fig)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[100]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]

trace = go.Violin(x=countries_pyver_ratio['Python 3'], meanline={'visible': True}, box={'visible': True}, fillcolor=colors[0], opacity=0.6, line={'color': 'black'})
layout = go.Layout(title="Use ratio of Python 3 in the world", xaxis={'zeroline': False})

data = [trace]
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# In[101]:


# ['equirectangular', 'mercator', 'orthographic', 'natural earth', 'kavrayskiy7', 'miller', 'robinson', 'eckert4',
#             'azimuthal equal area', 'azimuthal equidistant', 'conic
#             equal area', 'conic conformal', 'conic equidistant',
#             'gnomonic', 'stereographic', 'mollweide', 'hammer',
#             'transverse mercator', 'albers usa', 'winkel tripel',
#             'aitoff', 'sinusoidal']
data = [ dict(
        type = 'choropleth',
        locations = countries_pyver_ratio.index,
        locationmode = 'country names', 
        z = countries_pyver_ratio['Python 3'] * 100,
        text = countries_pyver_ratio.index,
        colorscale = 'Bluered',
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
#             autotick = False,
            ticksuffix = '%',
            title = 'Percent'),
      ) ]

layout = dict(
    title = 'Python 3 in the world',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'equirectangular'
        ),
    )
)


# fig = go.Figure()
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)


# ## 开发者中使用 IDE 的情况？

# In[102]:


cols = find_cols(survey_df, ['what', 'editor(s)/ide(s)'])
editors = survey_df[cols]
editors.columns = [item.split(':')[0] for item in editors.columns]
editors.head()


# In[103]:


count_editors = editors.count().sort_values(ascending=False)
count_editors.head()


# In[104]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(y=count_editors.index[::-1], x=count_editors.values[::-1], marker={'color': colors[::-1]}, orientation='h')

data = [trace]
layout = go.Layout(
    title="What editor(s)/IDE(s) have you considered for use in your Python development?",
    margin={'r': 10},
    height=1000,
    yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)


# In[105]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(x=count_editors.index, y=count_editors.values, marker={'color': colors}, orientation='v')

data = [trace]
layout = go.Layout(
    title="What editor(s)/IDE(s) have you considered for use in your Python development?",
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)


# In[106]:


col = find_cols(survey_df, ['what', 'main', 'editor'])
main_editor = survey_df[col[0]]
count_main_editor = main_editor.value_counts(ascending=False)
count_main_editor.head()


# In[107]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_main_editor))]
trace = go.Bar(y=count_main_editor.index[::-1], x=count_main_editor.values[::-1], marker={'color': colors[::-1]}, orientation='h')

data = [trace]
layout = go.Layout(
    title="What is the main editor you use for your current python development?",
    margin={'r': 10},
    height=1000,
    yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)


# In[108]:


colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(x=count_main_editor.index, y=count_main_editor.values, marker={'color': colors}, orientation='v')

data = [trace]
layout = go.Layout(
    title="What is the main editor you use for your current python development?",
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)


# In[ ]: