from __future__ import print_function, division
import nsfg3
import pandas
import numpy as np
def ReadFemResp1995():
"""Reads respondent data from NSFG Cycle 5.
returns: DataFrame
"""
dat_file = '1995FemRespData.dat.gz'
names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt']
colspecs = [(12360-1, 12363),
(4637-1, 4638),
(11759-1, 11762),
(14-1, 16),
(12350-1, 12359)]
df = pandas.read_fwf(dat_file,
compression='gzip',
colspecs=colspecs,
names=names)
df.timesmar.replace([98, 99], np.nan, inplace=True)
df['evrmarry'] = (df.timesmar > 0)
nsfg3.CleanData(df)
return df
df = ReadFemResp1995()
sum(~df.evrmarry)
4006
len(df[(df.cmbirth >= 604) & (df.cmbirth <= 720)])
3918
len(df[(df.cmmarrhx >= 780) & (df.cmmarrhx <= 840)])
192
df.cmintvw.value_counts().sort_index()
1141 514 1142 2448 1143 2466 1144 1692 1145 1381 1146 1076 1147 722 1148 172 1149 81 1150 295 Name: cmintvw, dtype: int64
df.finalwgt.value_counts().sort_index()
349.8761 1 356.8193 1 433.7413 1 434.1431 1 441.2415 1 463.0787 1 464.5584 1 468.2044 1 472.6256 1 482.7008 1 495.1924 1 498.6220 1 507.9773 1 511.4543 1 519.3578 1 523.6670 1 525.7335 1 555.8899 1 561.4898 1 561.6297 1 564.7645 1 567.7595 1 589.4533 1 590.1313 1 590.1364 1 617.0250 1 634.0294 1 634.8276 1 637.1154 1 640.9224 1 .. 18588.5661 1 18702.0666 1 18890.5291 1 19985.1776 1 20077.5177 1 20161.3343 1 20457.1239 1 20628.0956 1 20731.0408 1 20864.2010 1 20916.7939 1 21558.0296 1 21886.1411 1 22304.0038 1 22568.3367 1 22718.1978 1 22812.9377 1 23231.5175 1 23497.7818 1 23693.3226 1 23758.9147 1 24146.9122 1 24653.6224 1 24916.6825 1 25049.8728 1 25568.3298 1 25840.3059 1 26067.4314 1 26562.8055 1 33549.8227 1 Name: finalwgt, dtype: int64
df.timesmar.value_counts().sort_index()
1 5559 2 1077 3 174 4 26 5 5 Name: timesmar, dtype: int64