#!/usr/bin/env python
# coding: utf-8

# # Simpson paradoxes over time

# Copyright 2021 Allen B. Downey
# 
# License: [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/)
# 
# [Click here to run this notebook on Colab](https://colab.research.google.com/github/AllenDowney/ProbablyOverthinkingIt2/blob/master/simpson_wages.ipynb)

# 

# In[1]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# In[2]:


from simpson import *


# In[3]:


gss = pd.read_hdf('gss_simpson', 'gss')


# Would you say that most of the time people try to be helpful, or that they are mostly just looking out for themselves?

# In[4]:


xvarname = 'year'
yvarname = 'helpful'
gvarname = 'cohort10'

run_subgroups(gss, xvarname, yvarname, gvarname)


# In[5]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[6]:


visualize(series_all, table)
plt.title('')

title = """Would you say that most of the time people try to be helpful, 
or that they are mostly just looking out for themselves?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "helpful"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('helpful_vs_year_by_cohort10.jpg')


# In[ ]:


# ## trust
# 
# Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people?

# In[7]:


xvarname = 'year'
yvarname = 'trust'
gvarname = 'cohort10'
yvalue = 1

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[8]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[9]:


visualize(series_all, table)
plt.title('')

title = """Generally speaking, would you say that most people can be trusted 
or that you can't be too careful in dealing with people?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "can be trusted"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('trust_vs_year_by_cohort10.jpg')


# In[ ]:


# In[ ]:


# Do you think most people would try to take advantage of you if they got a chance, or would they try to be fair?

# In[10]:


xvarname = 'year'
yvarname = 'fair'
gvarname = 'cohort10'
yvalue = 2

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[11]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[12]:


visualize(series_all, table)
plt.title('')

title = """Do you think most people would try to take advantage of you if they got a chance, 
or would they try to be fair?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "would try to be fair"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('fair_vs_year_by_cohort10.jpg')


# In[ ]:


# Is there any area right around here--that is, within a mile--where you would be afraid to walk alone at night?

# In[13]:


xvarname = 'year'
yvarname = 'fear'
gvarname = 'cohort10'
yvalue = 2

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[14]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[15]:


visualize(series_all, table)
plt.title('')

title = """Is there any area right around here--that is, within a mile--
where you would be afraid to walk alone at night?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "no"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('fear_vs_year_by_cohort10.jpg')


# In[ ]:


# In[ ]:


# ## happy

# In[16]:


xvarname = 'year'
yvarname = 'happy'
gvarname = 'cohort10'
yvalue = [1,2]

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[17]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[18]:


visualize(series_all, table)
plt.title('')

title = """Taken all together, how would you say things are these days--
would you say that you are very happy, pretty happy, or not too happy?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "very happy" or "pretty happy"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('happy_vs_year_by_cohort10.jpg')


# In[ ]:


# In[ ]:


# ## pornlaw
# 
# https://gss.norc.org/Documents/quex/GSS2018%20Ballot%202%20-%20English.pdf
# 
# 
# Which of these statements comes closest to your feelings about pornography laws?     
# 
# 1. There should be laws against the distribution of pornography, whatever the age, or
# 
# 2. There should be laws against the distribution of pornography to persons under 18, or
# 
# 3. There should be no laws forbidding the distribution of pornography

# In[19]:


xvarname = 'year'
yvarname = 'pornlaw'
gvarname = 'cohort10'
yvalue = 1

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[20]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[21]:


visualize(series_all, table)
plt.title('')

title = """Do you think there should be laws against the distribution of pornography, 
whatever the age?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent who agree')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('pornlaw_vs_year_by_cohort10.jpg')


# In[ ]:


# Do you think the use of marijuana should be made legal or not?

# In[22]:


xvarname = 'year'
yvarname = 'fair'
gvarname = 'cohort10'
yvalue = 1

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[23]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.drop([1890, 1990], axis=1, inplace=True)
table


# In[24]:


visualize(series_all, table)
plt.title('')

title = """Do you think the use of marijuana should be made legal or not?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "No legal"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('grass_vs_year_by_cohort10.jpg')


# In[ ]:


# In[ ]:


# Please tell me whether or not you think it should be possible for a pregnant woman to obtain a legal abortion if she is married and does not want any more children?

# In[25]:


xvarname = 'year'
yvarname = 'abnomore'
gvarname = 'degree5'
yvalue = 1

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[26]:


xvarname = 'year'
yvarname = 'abnomore'
gvarname = 'degree5'
yvalue = 1

pre2002 = gss['year'] <= 2002
run_subgroups(gss[pre2002].copy(), xvarname, yvarname, gvarname, yvalue)


# In[27]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.sort_values(by=2017, axis=1, ascending=False, inplace=True)
table


# In[28]:


visualize(series_all, table)
plt.title('')

title = """Do you think it should be possible for a pregnant woman to obtain 
a legal abortion if she is married and does not want any more children?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying yes')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('abnomore_vs_year_by_degree.jpg')


# Suppose an admitted Communist wanted to make a speech in your community. Should he be allowed to speak, or not?

# In[29]:


xvarname = 'year'
yvarname = 'spkcom'
gvarname = 'degree5'
yvalue = 1

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[30]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.sort_values(by=2017, axis=1, ascending=False, inplace=True)
table


# In[31]:


visualize(series_all, table)
plt.title('')

title = """Suppose an admitted Communist wanted to make a speech in your community. 
Should he be allowed to speak, or not?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "allowed"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('spkcom_vs_year_by_degree.jpg')


# In[ ]:


# There are always some people whose ideas are considered bad or dangerous by other people. For instance, somebody who is against all churches and religion . . .
# 
# If some people in your community suggested that a book he wrote against churches and religion should be taken out of your public library, would you favor removing this book, or not?

# In[32]:


xvarname = 'year'
yvarname = 'libath'
gvarname = 'degree5'
yvalue = 2

run_subgroups(gss, xvarname, yvarname, gvarname, yvalue)


# In[33]:


series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue)
table.sort_values(by=2017, axis=1, ascending=False, inplace=True)
table


# In[34]:


visualize(series_all, table)
plt.title('')

title = """If people object to a book by someone who is opposed to 
churches and religion, should it be removed from a public library, or not?
"""
plt.title(title, loc='left', fontdict=dict(fontsize=14))
plt.ylabel('Percent saying "not removed"')
plt.xlabel('Year')

x = y = 1.02
plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1)   
plt.tight_layout()
plt.savefig('libath_vs_year_by_degree.jpg')


# In[ ]:


# In[ ]: