#!/usr/bin/env python
# coding: utf-8

# ## pandasプロット(鋭意制作中)
# 

# In[2]:


# python用のパッケージ
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
get_ipython().run_line_magic('matplotlib', 'inline')

# jupyter用のdisplayメソッド
from IPython.display import display, Latex, HTML, Math, JSON


# In[3]:


# 9章のデータをRからJSON形式に変換
d = pd.read_json('''
[{"x":3,"y":5},{"x":3.2105,"y":3},{"x":3.4211,"y":6},{"x":3.6316,"y":7},{"x":3.8421,"y":7},
    {"x":4.0526,"y":5},{"x":4.2632,"y":9},{"x":4.4737,"y":9},{"x":4.6842,"y":7},{"x":4.8947,"y":10},
    {"x":5.1053,"y":12},{"x":5.3158,"y":8},{"x":5.5263,"y":7},{"x":5.7368,"y":4},{"x":5.9474,"y":4},
    {"x":6.1579,"y":11},{"x":6.3684,"y":9},{"x":6.5789,"y":9},{"x":6.7895,"y":8},{"x":7,"y":6}]
''')


# In[4]:


d.describe()


# In[28]:


d.plot.scatter(x='x', y='y')
plt.show()


# In[27]:


d.plot.hist(x='x', y='y')
plt.show()


# In[38]:


d.plot.kde(x='x', y='y')
plt.show()


# In[19]:


# yの値は0以上の離散値で、yの平均と分散がほぼ等しい(オーダー的に)
print d.y.mean(), d.y.var()


# In[21]:


# statsmodelsを使ってglmを計算します
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats.stats import pearsonr


# In[23]:


fit = smf.glm('y ~ x', data=d, family=sm.families.Poisson(link=sm.families.links.log)).fit()
fit.params


# In[29]:


# λの予測値nuをyhatにセット
d['yhat'] = fit.predict()
ax = d.plot(x='x', y='yhat')
d.plot.scatter(x='x', y='y', ax=ax)
plt.show()


# In[35]:


# seabornの線形回帰機能を使って、入力データのプロット（lmplot）
sns.lmplot(x='x', y='y', data=d)
plt.show()


# In[ ]: