#!/usr/bin/env python # coding: utf-8 # # Simpson paradoxes over time # Copyright 2021 Allen B. Downey # # License: [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/) # # [Click here to run this notebook on Colab](https://colab.research.google.com/github/AllenDowney/ProbablyOverthinkingIt2/blob/master/simpson_wages.ipynb) # # In[1]: import numpy as np import pandas as pd import matplotlib.pyplot as plt # In[2]: from simpson import * # In[3]: gss = pd.read_hdf('gss_simpson', 'gss') # Would you say that most of the time people try to be helpful, or that they are mostly just looking out for themselves? # In[4]: xvarname = 'year' yvarname = 'helpful' gvarname = 'cohort10' run_subgroups(gss, xvarname, yvarname, gvarname) # In[5]: series_all, table = summarize(gss, xvarname, yvarname, gvarname) table.drop([1890, 1990], axis=1, inplace=True) table # In[6]: visualize(series_all, table) plt.title('') title = """Would you say that most of the time people try to be helpful, or that they are mostly just looking out for themselves? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "helpful"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('helpful_vs_year_by_cohort10.jpg') # In[ ]: # ## trust # # Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people? # In[7]: xvarname = 'year' yvarname = 'trust' gvarname = 'cohort10' yvalue = 1 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[8]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[9]: visualize(series_all, table) plt.title('') title = """Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "can be trusted"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('trust_vs_year_by_cohort10.jpg') # In[ ]: # In[ ]: # Do you think most people would try to take advantage of you if they got a chance, or would they try to be fair? # In[10]: xvarname = 'year' yvarname = 'fair' gvarname = 'cohort10' yvalue = 2 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[11]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[12]: visualize(series_all, table) plt.title('') title = """Do you think most people would try to take advantage of you if they got a chance, or would they try to be fair? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "would try to be fair"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('fair_vs_year_by_cohort10.jpg') # In[ ]: # Is there any area right around here--that is, within a mile--where you would be afraid to walk alone at night? # In[13]: xvarname = 'year' yvarname = 'fear' gvarname = 'cohort10' yvalue = 2 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[14]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[15]: visualize(series_all, table) plt.title('') title = """Is there any area right around here--that is, within a mile-- where you would be afraid to walk alone at night? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "no"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('fear_vs_year_by_cohort10.jpg') # In[ ]: # In[ ]: # ## happy # In[16]: xvarname = 'year' yvarname = 'happy' gvarname = 'cohort10' yvalue = [1,2] run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[17]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[18]: visualize(series_all, table) plt.title('') title = """Taken all together, how would you say things are these days-- would you say that you are very happy, pretty happy, or not too happy? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "very happy" or "pretty happy"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('happy_vs_year_by_cohort10.jpg') # In[ ]: # In[ ]: # ## pornlaw # # https://gss.norc.org/Documents/quex/GSS2018%20Ballot%202%20-%20English.pdf # # # Which of these statements comes closest to your feelings about pornography laws? # # 1. There should be laws against the distribution of pornography, whatever the age, or # # 2. There should be laws against the distribution of pornography to persons under 18, or # # 3. There should be no laws forbidding the distribution of pornography # In[19]: xvarname = 'year' yvarname = 'pornlaw' gvarname = 'cohort10' yvalue = 1 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[20]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[21]: visualize(series_all, table) plt.title('') title = """Do you think there should be laws against the distribution of pornography, whatever the age? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent who agree') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('pornlaw_vs_year_by_cohort10.jpg') # In[ ]: # Do you think the use of marijuana should be made legal or not? # In[22]: xvarname = 'year' yvarname = 'fair' gvarname = 'cohort10' yvalue = 1 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[23]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.drop([1890, 1990], axis=1, inplace=True) table # In[24]: visualize(series_all, table) plt.title('') title = """Do you think the use of marijuana should be made legal or not? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "No legal"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Birth decade', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('grass_vs_year_by_cohort10.jpg') # In[ ]: # In[ ]: # Please tell me whether or not you think it should be possible for a pregnant woman to obtain a legal abortion if she is married and does not want any more children? # In[25]: xvarname = 'year' yvarname = 'abnomore' gvarname = 'degree5' yvalue = 1 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[26]: xvarname = 'year' yvarname = 'abnomore' gvarname = 'degree5' yvalue = 1 pre2002 = gss['year'] <= 2002 run_subgroups(gss[pre2002].copy(), xvarname, yvarname, gvarname, yvalue) # In[27]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.sort_values(by=2017, axis=1, ascending=False, inplace=True) table # In[28]: visualize(series_all, table) plt.title('') title = """Do you think it should be possible for a pregnant woman to obtain a legal abortion if she is married and does not want any more children? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying yes') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('abnomore_vs_year_by_degree.jpg') # Suppose an admitted Communist wanted to make a speech in your community. Should he be allowed to speak, or not? # In[29]: xvarname = 'year' yvarname = 'spkcom' gvarname = 'degree5' yvalue = 1 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[30]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.sort_values(by=2017, axis=1, ascending=False, inplace=True) table # In[31]: visualize(series_all, table) plt.title('') title = """Suppose an admitted Communist wanted to make a speech in your community. Should he be allowed to speak, or not? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "allowed"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('spkcom_vs_year_by_degree.jpg') # In[ ]: # There are always some people whose ideas are considered bad or dangerous by other people. For instance, somebody who is against all churches and religion . . . # # If some people in your community suggested that a book he wrote against churches and religion should be taken out of your public library, would you favor removing this book, or not? # In[32]: xvarname = 'year' yvarname = 'libath' gvarname = 'degree5' yvalue = 2 run_subgroups(gss, xvarname, yvarname, gvarname, yvalue) # In[33]: series_all, table = summarize(gss, xvarname, yvarname, gvarname, yvalue) table.sort_values(by=2017, axis=1, ascending=False, inplace=True) table # In[34]: visualize(series_all, table) plt.title('') title = """If people object to a book by someone who is opposed to churches and religion, should it be removed from a public library, or not? """ plt.title(title, loc='left', fontdict=dict(fontsize=14)) plt.ylabel('Percent saying "not removed"') plt.xlabel('Year') x = y = 1.02 plt.legend(title='Highest degree', bbox_to_anchor=(x, y), loc='upper left', ncol=1) plt.tight_layout() plt.savefig('libath_vs_year_by_degree.jpg') # In[ ]: # In[ ]: