import pandas as pd
from pandas import Series
from pandas import DataFrame
import sys
import numpy as np
!cat ch06/ex1.csv
a,b,c,d,message 1,2,3,4,hello 5,6,7,8,world 9,10,11,12,foo
df = pd.read_csv('ch06/ex1.csv')
df
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
pd.read_table('ch06/ex1.csv', sep = ',')
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
!cat ch06/ex2.csv
1,2,3,4,hello 5,6,7,8,world 9,10,11,12,foo
pd.read_csv('ch06/ex2.csv', header = None)
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
pd.read_csv('ch06/ex2.csv', names = ['a', 'b', 'c', 'd', 'message'])
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('ch06/ex2.csv', names = names, index_col = 'message')
a | b | c | d | |
---|---|---|---|---|
message | ||||
hello | 1 | 2 | 3 | 4 |
world | 5 | 6 | 7 | 8 |
foo | 9 | 10 | 11 | 12 |
!cat ch06/csv_mindex.csv
key1,key2,value1,value2 one,a,1,2 one,b,3,4 one,c,5,6 one,d,7,8 two,a,9,10 two,b,11,12 two,c,13,14 two,d,15,16
parsed = pd.read_csv('ch06/csv_mindex.csv', index_col = ['key1', 'key2'])
parsed
value1 | value2 | ||
---|---|---|---|
key1 | key2 | ||
one | a | 1 | 2 |
b | 3 | 4 | |
c | 5 | 6 | |
d | 7 | 8 | |
two | a | 9 | 10 |
b | 11 | 12 | |
c | 13 | 14 | |
d | 15 | 16 |
list(open('ch06/ex3.txt'))
[' A B C\n', 'aaa -0.264438 -1.026059 -0.619500\n', 'bbb 0.927272 0.302904 -0.032399\n', 'ccc -0.264273 -0.386314 -0.217601\n', 'ddd -0.871858 -0.348382 1.100491\n']
result = pd.read_csv('ch06/ex3.txt', sep = '\s+')
result
A | B | C | |
---|---|---|---|
aaa | -0.264438 | -1.026059 | -0.619500 |
bbb | 0.927272 | 0.302904 | -0.032399 |
ccc | -0.264273 | -0.386314 | -0.217601 |
ddd | -0.871858 | -0.348382 | 1.100491 |
!cat ch06/ex4.csv
# hey! a,b,c,d,message # just wanted to make things more difficult for you # who reads CSV files with computers, anyway? 1,2,3,4,hello 5,6,7,8,world 9,10,11,12,foo
pd.read_csv('ch06/ex4.csv', skiprows = [0, 2, 3])
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
!cat ch06/ex5.csv
something,a,b,c,d,message one,1,2,3,4,NA two,5,6,,8,world three,9,10,11,12,foo
result = pd.read_csv('ch06/ex5.csv')
result
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | one | 1 | 2 | 3.0 | 4 | NaN |
1 | two | 5 | 6 | NaN | 8 | world |
2 | three | 9 | 10 | 11.0 | 12 | foo |
pd.isnull(result)
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | False | False | False | False | False | True |
1 | False | False | False | True | False | False |
2 | False | False | False | False | False | False |
result = pd.read_csv('ch06/ex5.csv', na_values=['NULL'])
result
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | one | 1 | 2 | 3.0 | 4 | NaN |
1 | two | 5 | 6 | NaN | 8 | world |
2 | three | 9 | 10 | 11.0 | 12 | foo |
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
result = pd.read_csv('ch06/ex5.csv', na_values=sentinels)
result
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | one | 1 | 2 | 3.0 | 4 | NaN |
1 | NaN | 5 | 6 | NaN | 8 | world |
2 | three | 9 | 10 | 11.0 | 12 | NaN |
result = pd.read_csv('ch06/ex6.csv')
result.head()
one | two | three | four | key | |
---|---|---|---|---|---|
0 | 0.467976 | -0.038649 | -0.295344 | -1.824726 | L |
1 | -0.358893 | 1.404453 | 0.704965 | -0.200638 | B |
2 | -0.501840 | 0.659254 | -0.421691 | -0.057688 | G |
3 | 0.204886 | 1.074134 | 1.388361 | -0.982404 | R |
4 | 0.354628 | -0.133116 | 0.283763 | -0.837063 | Q |
pd.read_csv('ch06/ex6.csv', nrows = 5)
one | two | three | four | key | |
---|---|---|---|---|---|
0 | 0.467976 | -0.038649 | -0.295344 | -1.824726 | L |
1 | -0.358893 | 1.404453 | 0.704965 | -0.200638 | B |
2 | -0.501840 | 0.659254 | -0.421691 | -0.057688 | G |
3 | 0.204886 | 1.074134 | 1.388361 | -0.982404 | R |
4 | 0.354628 | -0.133116 | 0.283763 | -0.837063 | Q |
chunker = pd.read_csv('ch06/ex6.csv', chunksize = 1000)
tot = Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value = 0)
tot = tot.sort_values(ascending = False)
tot[:10]
E 368.0 X 364.0 L 346.0 O 343.0 Q 340.0 M 338.0 J 337.0 F 335.0 K 334.0 H 330.0 dtype: float64
data = pd.read_csv('ch06/ex5.csv')
data
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | one | 1 | 2 | 3.0 | 4 | NaN |
1 | two | 5 | 6 | NaN | 8 | world |
2 | three | 9 | 10 | 11.0 | 12 | foo |
data.to_csv('ch06/out.csv')
!cat ch06/out.csv
,something,a,b,c,d,message 0,one,1,2,3.0,4, 1,two,5,6,,8,world 2,three,9,10,11.0,12,foo
data.to_csv(sys.stdout, sep = '|')
|something|a|b|c|d|message 0|one|1|2|3.0|4| 1|two|5|6||8|world 2|three|9|10|11.0|12|foo
data.to_csv(sys.stdout, na_rep='NULL')
,something,a,b,c,d,message 0,one,1,2,3.0,4,NULL 1,two,5,6,NULL,8,world 2,three,9,10,11.0,12,foo
data.to_csv(sys.stdout, header = False, index = False)
one,1,2,3.0,4, two,5,6,,8,world three,9,10,11.0,12,foo
data.to_csv(sys.stdout, index = False, cols = ['a', 'b', 'c'])
something,a,b,c,d,message one,1,2,3.0,4, two,5,6,,8,world three,9,10,11.0,12,foo
dates = pd.date_range('1/1/2000', periods=7)
dates
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07'], dtype='datetime64[ns]', freq='D')
ts = Series(np.arange(7), index = dates)
ts
2000-01-01 0 2000-01-02 1 2000-01-03 2 2000-01-04 3 2000-01-05 4 2000-01-06 5 2000-01-07 6 Freq: D, dtype: int64
ts.to_csv('ch06/tseries.csv')
!cat ch06/tseries.csv
2000-01-01,0 2000-01-02,1 2000-01-03,2 2000-01-04,3 2000-01-05,4 2000-01-06,5 2000-01-07,6
Series.from_csv('ch06/tseries.csv', parse_dates=True)
2000-01-01 0 2000-01-02 1 2000-01-03 2 2000-01-04 3 2000-01-05 4 2000-01-06 5 2000-01-07 6 dtype: int64
!cat ch06/ex7.csv
"a","b","c" "1","2","3" "1","2","3","4"
import csv
f = open('ch06/ex7.csv')
reader = csv.reader(f)
for line in reader:
print (line)
['a', 'b', 'c'] ['1', '2', '3'] ['1', '2', '3', '4']
lines = list(csv.reader(open('ch06/ex7.csv')))
lines
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '2', '3', '4']]
lines[0]
['a', 'b', 'c']
lines[1:]
[['1', '2', '3'], ['1', '2', '3', '4']]
header, values = lines[0], lines[1:]
data_dict = {h: v for h, v in zip(header, zip(*values))}
data_dict
{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}
class my_dialect(csv.Dialect):
lineterminator = '\n'
delimiter = ';'
quotechar = '"'
quoting = csv.QUOTE_MINIMAL
reader = csv.reader(f, dialect = my_dialect)
reader
<_csv.reader at 0x1083a42e8>
reader = csv.reader(f, delimiter = '|')
reader
<_csv.reader at 0x1083a4198>
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
{"name": "Katie", "age": 33, "pet": "Cisco"}]
} """
import json
result = json.loads(obj)
result
{'name': 'Wes', 'pet': None, 'places_lived': ['United States', 'Spain', 'Germany'], 'siblings': [{'age': 25, 'name': 'Scott', 'pet': 'Zuko'}, {'age': 33, 'name': 'Katie', 'pet': 'Cisco'}]}
asjson = json.dumps(result)
siblings = DataFrame(result['siblings'], columns = ['name', 'age'])
siblings
name | age | |
---|---|---|
0 | Scott | 25 |
1 | Katie | 33 |
from lxml.html import parse
from urllib.request import urlopen
parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))
doc = parsed.getroot()
links = doc.findall('.//a')
links[15: 20]
[<Element a at 0x10aa3d8b8>, <Element a at 0x10aa3d908>, <Element a at 0x10aa3d958>, <Element a at 0x10aa3d9a8>, <Element a at 0x10aa3d9f8>]
link = links[18]
link.get('href')
'https://www.yahoo.com/beauty'
link.text_content()
'Beauty'
urls = [link.get('href') for link in doc.findall('.//a')]
urls[0: 10]
['https://www.yahoo.com/', 'https://mail.yahoo.com/?.intl=us&.lang=en-US&.src=ym', 'https://search.yahoo.com/search', 'http://news.yahoo.com/', 'http://sports.yahoo.com/', 'http://finance.yahoo.com/', 'https://celebrity.yahoo.com/', 'https://weather.yahoo.com/', 'https://answers.yahoo.com/', 'https://www.flickr.com/']
tables = doc.findall('.//table')
calls = tables[1]
puts = tables[2]
calls
<Element table at 0x108093a98>
rows = calls.findall('.//tr')
def _unpack(row, kind = 'td'):
elts = row.findall('.//%s' %kind)
return [val.text_content() for val in elts]
_unpack(rows[0], kind='th')
['\n \n Strike\n \n \ue004\n \ue002\n \n \n ∵ Filter\n ', 'Contract Name', '\n \n Last\n \n \ue004\n \ue002\n \n \n ', '\n \n Bid\n \n \ue004\n \ue002\n \n \n ', '\n \n Ask\n \n \ue004\n \ue002\n \n \n ', '\n \n Change\n \n \ue004\n \ue002\n \n \n ', '\n \n %Change\n \n \ue004\n \ue002\n \n \n ', '\n \n Volume\n \n \ue004\n \ue002\n \n \n ', '\n \n Open Interest\n \n \ue004\n \ue002\n \n \n ', '\n \n Implied Volatility\n \n \ue004\n \ue002\n \n \n ']
_unpack(rows[1], kind = 'td')
['\n \n \n ✕\n [modify]\n \n ']
frame = pd.read_csv('ch06/ex1.csv')
frame
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
frame.to_pickle('ch06/frame_pickle')
pd.read_pickle('ch06/frame_pickle')
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |