#!/usr/bin/env python
# coding: utf-8

# 통계적 사고 (2판) 연습문제 ([thinkstats2.com](thinkstats2.com), [think-stat.xwmooc.org](http://think-stat.xwmooc.org))<br>
# Allen Downey / 이광춘(xwMOOC)

# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')
from __future__ import print_function

import math
import first
import chap07soln
import thinkstats2
import thinkplot
import numpy as np


# ## 연습문제 7.1
# 
# NSFG 에서 나온 데이터를 사용해서, 출생체중과 산모연령 산점도를 그리시오. 출생체중과 산모연령 백분위수를 도식화하시오. 피어슨 상관과 스피어만 상관을 계산하시오. 두 번수 사이 관계를 어떻게 특징적으로 묘사할 수 있을까?

# In[12]:


def ScatterPlot(ages, weights, alpha=1.0):
    """Make a scatter plot and save it.

    ages: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)

def BinnedPercentiles(df):
    """Bin the data by age and plot percentiles of weight for each bin.

    df: DataFrame
    """
    bins = np.arange(10, 48, 3)
    indices = np.digitize(df.agepreg, bins)
    groups = df.groupby(indices)

    ages = [group.agepreg.mean() for i, group in groups][1:-1]
    cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]

    thinkplot.PrePlot(3)
    for percent in [75, 50, 25]:
        weights = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        thinkplot.Plot(ages, weights, label=label)

    thinkplot.Show(xlabel="mother's age (years)",
                   ylabel='birth weight (lbs)')    


# In[ ]: