from __future__ import division
from numpy.random import randn
import numpy as np
import os
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(10, 6))
from pandas import *
import pandas
np.set_printoptions(precision=4)
%cd book_scripts/fec
/home/phillip/Documents/code/py/pandas-book/rev_539000/book_scripts/fec
fec = read_csv('P00000001-ALL.csv')
fec
cmte_id | cand_id | cand_nm | contbr_nm | contbr_city | ... | receipt_desc | memo_cd | memo_text | form_tp | file_num | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | ... | NaN | NaN | NaN | SA17A | 736166 |
1 | C00410118 | P20002978 | Bachmann, Michelle | HARVEY, WILLIAM | MOBILE | ... | NaN | NaN | NaN | SA17A | 736166 |
2 | C00410118 | P20002978 | Bachmann, Michelle | SMITH, LANIER | LANETT | ... | NaN | NaN | NaN | SA17A | 749073 |
3 | C00410118 | P20002978 | Bachmann, Michelle | BLEVINS, DARONDA | PIGGOTT | ... | NaN | NaN | NaN | SA17A | 749073 |
4 | C00410118 | P20002978 | Bachmann, Michelle | WARDENBURG, HAROLD | HOT SPRINGS NATION | ... | NaN | NaN | NaN | SA17A | 736166 |
5 | C00410118 | P20002978 | Bachmann, Michelle | BECKMAN, JAMES | SPRINGDALE | ... | NaN | NaN | NaN | SA17A | 736166 |
6 | C00410118 | P20002978 | Bachmann, Michelle | BLEVINS, DARONDA | PIGGOTT | ... | NaN | NaN | NaN | SA17A | 736166 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1001724 | C00500587 | P20003281 | Perry, Rick | HEFFERNAN, JILL PRINCE MRS. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001725 | C00500587 | P20003281 | Perry, Rick | ELWOOD, MIKE MR. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001726 | C00500587 | P20003281 | Perry, Rick | GORMAN, CHRIS D. MR. | INFO REQUESTED | ... | REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... | NaN | REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... | SA17A | 751678 |
1001727 | C00500587 | P20003281 | Perry, Rick | DUFFY, DAVID A. MR. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001728 | C00500587 | P20003281 | Perry, Rick | GRANE, BRYAN F. MR. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001729 | C00500587 | P20003281 | Perry, Rick | TOLBERT, DARYL MR. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001730 | C00500587 | P20003281 | Perry, Rick | ANDERSON, MARILEE MRS. | INFO REQUESTED | ... | NaN | NaN | NaN | SA17A | 751678 |
1001731 rows × 16 columns
fec.ix[123456]
cmte_id C00431445 cand_id P80003338 cand_nm Obama, Barack contbr_nm ELLMAN, IRA contbr_city TEMPE ... contb_receipt_dt 01-DEC-11 receipt_desc NaN memo_cd NaN memo_text NaN form_tp SA17A file_num 772372 Name: 123456, Length: 16, dtype: object
unique_cands = fec.cand_nm.unique()
unique_cands
unique_cands[2]
'Obama, Barack'
parties = {'Bachmann, Michelle': 'Republican',
'Cain, Herman': 'Republican',
'Gingrich, Newt': 'Republican',
'Huntsman, Jon': 'Republican',
'Johnson, Gary Earl': 'Republican',
'McCotter, Thaddeus G': 'Republican',
'Obama, Barack': 'Democrat',
'Paul, Ron': 'Republican',
'Pawlenty, Timothy': 'Republican',
'Perry, Rick': 'Republican',
"Roemer, Charles E. 'Buddy' III": 'Republican',
'Romney, Mitt': 'Republican',
'Santorum, Rick': 'Republican'}
fec.cand_nm[123456:123461]
fec.cand_nm[123456:123461].map(parties)
# Add it as a column
fec['party'] = fec.cand_nm.map(parties)
fec['party'].value_counts()
Democrat 593746 Republican 407985 dtype: int64
(fec.contb_receipt_amt > 0).value_counts()
True 991475 False 10256 dtype: int64
fec = fec[fec.contb_receipt_amt > 0]
fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack', 'Romney, Mitt'])]