#!/usr/bin/env python
# coding: utf-8

# ***
# # 数据科学的编程工具
# > # Python使用简介
# ***
# 
# 王成军
# 
# wangchengjun@nju.edu.cn
# 
# 计算传播网 http://computational-communication.com

# # 人生苦短，我用Python。
# 
# Python（/ˈpaɪθən/）是一种面向对象、解释型计算机程序设计语言
# - 由Guido van Rossum于1989年底发明
# - 第一个公开发行版发行于1991年
# - Python语法简洁而清晰
# - 具有强大的标准库和丰富的第三方模块
# - 它常被昵称为胶水语言
# - TIOBE编程语言排行榜“2010年度编程语言”
# 
# 

# # 特点
# - 免费、功能强大、使用者众多
# - 与R和MATLAB相比，Python是一门更易学、更严谨的程序设计语言。使用Python编写的脚本更易于理解和维护。
# - 如同其它编程语言一样，Python语言的基础知识包括：类型、列表（list）和元组（tuple）、字典（dictionary）、条件、循环、异常处理等。
# - 关于这些，初阶读者可以阅读《Beginning Python》一书（Hetland, 2005)。
# 

# ## Python中包含了丰富的类库。
# 众多开源的科学计算软件包都提供了Python的调用接口，例如著名的计算机视觉库OpenCV。
# Python本身的科学计算类库发展也十分完善，例如NumPy、SciPy和matplotlib等。
# 就社会网络分析而言，igraph, networkx, graph-tool, Snap.py等类库提供了丰富的网络分析工具

# ## Python软件与IDE
# 目前最新的Python版本为3.0，更稳定的2.7版本。
# 编译器是编写程序的重要工具。
# 免费的Python编译器有Spyder、PyCharm(免费社区版)、Ipython、Vim、 Emacs、 Eclipse(加上PyDev插件)。
# 

# # Installing Anaconda Python
# - Use the Anaconda Python
#     - http://continuum.io/downloads.html

# # 第三方包可以使用pip install的方法安装。
# - 可以点击ToolsOpen command prompt
# - 然后在打开的命令窗口中输入：
#     - pip install beautifulsoup4 
# 

# > pip install beautifulsoup4

# - NumPy /SciPy for scientific computing
# - pandas to make Python usable for data analysis
# - matplotlib to make graphics
# - scikit-learn for machine learning
# 

# In[90]:


import random, datetime
import numpy as np
import pylab as plt
import statsmodels.api as sm
from scipy.stats import norm
from scipy.stats.stats import pearsonr


# # Variable Type
# 

# In[93]:


# str, int, float
str(3)


# In[193]:


# int
int('5')


# In[192]:


# float
float('7.1')


# In[194]:


range(10)


# In[190]:


range(1, 10)


# # dir & help
# 
# 当你想要了解对象的详细信息时使用

# In[96]:


dir


# In[99]:


dir(str)[-5:]


# In[23]:


help(str)


# In[25]:


x = ' Hello WorlD  '
dir(x)[-10:]


# In[111]:


# lower
x.lower()


# In[112]:


# upper
x.upper()


# In[113]:


# rstrip
x.rstrip()


# In[115]:


# strip
x.strip()


# In[26]:


# replace
x.replace('lo', '')


# In[27]:


# split
x.split('lo')


# In[28]:


# join
','.join(['a', 'b'])


# # type
# 当你想要了解变量类型时使用type

# In[100]:


x = 'hello world'
type(x)


# # Data Structure
# list, tuple, set, dictionary, array
# 

# In[119]:


l = [1,2,3,3] # list
t = (1, 2, 3, 3) # tuple
s = set([1,2,3,3]) # set
d = {'a':1,'b':2,'c':3} # dict
a = np.array(l) # array
print l, t, s, d, a


# In[182]:


l = [1,2,3,3] # list
l.append(4)
l


# In[1]:


d = {'a':1,'b':2,'c':3} # dict
d.keys()


# In[2]:


d = {'a':1,'b':2,'c':3} # dict
d.values()


# In[185]:


d = {'a':1,'b':2,'c':3} # dict
d['b']


# In[187]:


d = {'a':1,'b':2,'c':3} # dict
d.items()


# # 定义函数

# In[4]:


def devidePlus(m, n): # 结尾是冒号
    y = float(m)/n+ 1 # 注意：空格
    return y          # 注意：return


# ## For 循环

# In[188]:


range(10)


# In[189]:


range(1, 10)  


# In[123]:


for i in range(10):
    print i, i*10, i**2


# In[122]:


for i in range(10):
    print i*10


# In[5]:


for i in range(10):
    print devidePlus(i, 2)


# In[121]:


# 列表内部的for循环
r = [devidePlus(i, 2)  for i in range(10)]
r


# # map

# In[21]:


map(devidePlus, [4,3,2], [2, 1, 5])
# 注意： 将（4， 2)作为一个组合进行计算，将（3， 1）作为一个组合进行计算


# In[20]:


map(lambda x, y: x + y, [1, 3, 5, 7, 9], [2, 4, 6, 8, 10])


# In[22]:


map(lambda x, y, z: x + y - z, [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [3, 3, 2, 2, 5])


# # if elif else

# In[132]:


j = 3
if j%2 == 1:
    print r'余数是1'
elif j%2 ==2:
    print r'余数是2'
else:
    print r'余数既不是1也不是2'


# In[131]:


x = 5
if x < 5:
    y = -1
    z = 5
elif x > 5:
    y = 1
    z = 11
else:
    y = 0
    z = 10
print(x, y, z)


# # while循环

# In[129]:


j = 0
while j <10:
    print j
    j+=1 # avoid dead loop
    

# In[128]:


j = 0
while j <10:
    if j%2 != 0: 
        print j**2
    j+=1 # avoid dead loop
    

# In[127]:


j = 0
while j <50:
    if j == 30:
        break
    if j%2 != 0: 
        print j**2
    j+=1 # avoid dead loop
    

# In[1]:


a = 4
while a:
    print a
    a -= 1
    if a < 0:
        a = None # []


# # try except 

# In[130]:


for i in [2, 0, 5]:
    try:
        print devidePlus(4, i)
    except Exception, e:
        print e
        pass


# In[1]:


alist = [[1,1], [0, 0, 1]]
for aa in alist:
    try:
        for a in aa:
            print 10 / a
    except Exception, e:
        print e
        pass


# In[2]:


alist = [[1,1], [0, 0, 1]]
for aa in alist:
    for a in aa:
        try:
            print 10 / a
        except Exception, e:
            print e
            pass


# # Write and Read data

# In[225]:


data =[[i, i**2, i**3] for i in range(10)] 
data


# In[146]:


for i in data:
    print '\t'.join(map(str, i))


# In[228]:


type(data)


# In[226]:


len(data)


# In[227]:


data[0]


# In[160]:


# 保存数据
data =[[i, i**2, i**3] for i in range(10000)] 

f = open("/Users/chengjun/github/cjc2016/data/data_write_to_file.txt", "wb")
for i in data:
    f.write('\t'.join(map(str,i)) + '\n')
f.close()


# In[162]:


with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    data = f.readlines()
data[:5]


# In[176]:


with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    data = f.readlines(1000)
len(data)


# In[177]:


with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    print f.readline()


# In[6]:


f = [1, 2, 3, 4, 5]
for k, i in enumerate(f):
    print k, i


# In[9]:


with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    for i in f:
        print i


# In[181]:


with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    for k, i in enumerate(f):
        if k%2000 ==0:
            print i


# In[17]:


data = []
line = '0\t0\t0\n'
line = line.replace('\n', '')
line = line.split('\t')
line = [int(i) for i in line] # convert str to int
data.append(line)
data


# In[11]:


# 读取数据
data = []
with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    for line in f:
        #line = line.replace('\n', '').split('\t')
        #line = [int(i) for i in line]
        data.append(line)
data


# In[153]:


# 读取数据
data = []
with open('/Users/chengjun/github/cjc2016/data/data_write_to_file.txt','r') as f:
    for line in f:
        line = line.replace('\n', '').split('\t')
        line = [int(i) for i in line]
        data.append(line)
data


# # 保存中间步骤产生的字典数据

# In[66]:


import json
data_dict = {'a':1, 'b':2, 'c':3}
with open('/Users/chengjun/github/cjc2016/save_dict.json', 'w') as f:
    json.dump(data_dict, f)


# In[67]:


dd = json.load(open("/Users/chengjun/github/cjc2016/save_dict.json"))
dd


# ## 重新读入json

# # 保存中间步骤产生的列表数据

# In[68]:


data_list = range(10)
with open('/Users/chengjun/github/cjc2016/save_list.json', 'w') as f:
    json.dump(data_list, f)


# In[69]:


dl = json.load(open("/Users/chengjun/github/cjc2016/save_list.json"))
dl


# # use dill to save data

# In[85]:


import dill # pip insstall dill 
# http://trac.mystic.cacr.caltech.edu/project/pathos/wiki/dill
def myFunction(num):
    return num,num

with open('/Users/chengjun/github/cjc2016/data.pkl', 'wb') as f:
    dill.dump(myFunction, f)


# In[86]:


with open('/Users/chengjun/github/cjc2016/data.pkl', 'r') as f:
    newFunction = dill.load(f)#, strictio=strictio))
newFunction('hello')


# http://stackoverflow.com/questions/35603979/pickling-defaultdict-with-lambda

# # 使用matplotlib绘图

# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')

import matplotlib.pyplot as plt
x = range(1, 100)
y = [i**-3 for i in x]
plt.plot(x, y, 'b-s')
plt.ylabel('$p(k)$', fontsize = 20)
plt.xlabel('$k$', fontsize = 20)
plt.xscale('log')
plt.yscale('log')
plt.title('Degree Distribution')
plt.show()


# In[3]:


import numpy as np
# red dashes, blue squares and green triangles
t = np.arange(0., 5., 0.2)
plt.plot(t, t, 'r--')
plt.plot(t, t**2, 'bs')
plt.plot(t, t**3, 'g^')
plt.show()


# In[63]:


# red dashes, blue squares and green triangles
t = np.arange(0., 5., 0.2)
plt.plot(t, t**2, 'b-s', label = '1')
plt.plot(t, t**2.5, 'r-o', label = '2')
plt.plot(t, t**3, 'g-^', label = '3')
plt.annotate(r'$\alpha = 3$', xy=(3.5, 40), xytext=(2, 80),
            arrowprops=dict(facecolor='black', shrink=0.05),
            fontsize = 20)
plt.ylabel('$f(t)$', fontsize = 20)
plt.xlabel('$t$', fontsize = 20)
plt.legend(loc=2,numpoints=1,fontsize=10)
plt.show()
# plt.savefig('/Users/chengjun/GitHub/cjc2016/figure/save_figure.png',
#             dpi = 300, bbox_inches="tight",transparent = True)


# In[55]:


plt.figure(1)
plt.subplot(221)
plt.plot(t, t, 'r--')
plt.text(2, 0.8*np.max(t), r'$\alpha = 1$', fontsize = 20)
plt.subplot(222)
plt.plot(t, t**2, 'bs')
plt.text(2, 0.8*np.max(t**2), r'$\alpha = 2$', fontsize = 20)
plt.subplot(223)
plt.plot(t, t**3, 'g^')
plt.text(2, 0.8*np.max(t**3), r'$\alpha = 3$', fontsize = 20)
plt.subplot(224)
plt.plot(t, t**4, 'r-o')
plt.text(2, 0.8*np.max(t**4), r'$\alpha = 4$', fontsize = 20)
plt.show()


# In[4]:


def f(t):
    return np.exp(-t) * np.cos(2*np.pi*t)

t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)


# In[5]:


plt.figure(1)
plt.subplot(211)
plt.plot(t1, f(t1), 'bo')
plt.plot(t2, f(t2), 'k')

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()


# In[25]:


import matplotlib.gridspec as gridspec
t = np.arange(0., 5., 0.2)

gs = gridspec.GridSpec(3, 3)
ax1 = plt.subplot(gs[0, :])
plt.plot(t, t**2, 'b-s')
ax2 = plt.subplot(gs[1,:-1])
plt.plot(t, t**2, 'g-s')
ax3 = plt.subplot(gs[1:, -1])
plt.plot(t, t**2, 'r-o')
ax4 = plt.subplot(gs[-1,0])
plt.plot(t, t**2, 'g-^')
ax5 = plt.subplot(gs[-1,-2])
plt.plot(t, t**2, 'b-<')
plt.tight_layout()


# In[221]:


def OLSRegressPlot(x,y,col,xlab,ylab):
    xx = sm.add_constant(x, prepend=True)
    res = sm.OLS(y,xx).fit()
    constant, beta = res.params
    r2 = res.rsquared
    lab = r'$\beta = %.2f, \,R^2 = %.2f$' %(beta,r2)
    plt.scatter(x,y,s=60,facecolors='none', edgecolors=col)
    plt.plot(x,constant + x*beta,"red",label=lab)
    plt.legend(loc = 'upper left',fontsize=16)
    plt.xlabel(xlab,fontsize=26)
    plt.ylabel(ylab,fontsize=26)

    
# In[222]:


x = np.random.randn(50)
y = np.random.randn(50) + 3*x
pearsonr(x, y)
fig = plt.figure(figsize=(10, 4),facecolor='white')
OLSRegressPlot(x,y,'RoyalBlue',r'$x$',r'$y$')
plt.show()


# In[206]:


fig = plt.figure(figsize=(7, 4),facecolor='white')
data = norm.rvs(10.0, 2.5, size=5000)
mu, std = norm.fit(data)
plt.hist(data, bins=25, normed=True, alpha=0.6, color='g')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'r', linewidth=2)
title = r"$\mu = %.2f, \,  \sigma = %.2f$" % (mu, std)
plt.title(title,size=16)
plt.show()


# In[223]:


from matplotlib.dates import WeekdayLocator, DayLocator, MONDAY, DateFormatter
from matplotlib.finance import quotes_historical_yahoo_ochl, candlestick_ochl

date1 = (2014, 2, 1)
date2 = (2014, 5, 1)
quotes = quotes_historical_yahoo_ochl('INTC', date1, date2)


# In[224]:


fig = plt.figure(figsize=(15, 5))
ax = fig.add_subplot(1,1,1)
candlestick_ochl(ax, quotes, width=0.8, colorup='green', colordown='r', alpha=0.8)
mondays = WeekdayLocator(MONDAY)    # major ticks on the mondays
alldays = DayLocator()              # minor ticks on the days
weekFormatter = DateFormatter('%b %d')  # e.g., Jan 12
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
ax.autoscale_view()
plt.setp( plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
plt.title(r'$Intel \,Corporation \,Stock \,Price$',size=16)
fig.subplots_adjust(bottom=0.2)
plt.show()


# # This is the end.
# > Thank you for your attention.