print "Before Pandas", len(dir())
import pandas
from pandas import *
print "After Pandas", len(dir())
from numpy import *
print "After NumPy", len(dir())
Reading Data
import urlparse
import httplib
import pandas
csv_data = "http://priede.bf.lu.lv/ftp/grozs/Datorlietas/Geog5028/TIS_PRG/HomePlanet/ASTEROID.CSV"
fwf_data = "http://ssd.jpl.nasa.gov/dat/ELEMENTS.NUMBR"
tax_data = "http://sbn.psi.edu/ferret/reformatTable.action?productId=TAXONOMY10_TAB&dataSetId=EAR-A-5-DDR-TAXONOMY-V6.0"
def save_file(location):
"""Read data at url"""
url = urlparse.urlparse(location)
connection = httplib.HTTPConnection(url.netloc)
connection.connect()
connection.request('GET', url.path)
response = connection.getresponse()
data = response.read()
filename = '/tmp/' + url.path.split('/')[-1]
print filename
with open(filename, 'rw+') as tmp:
tmp.write(data)
return filename
# Read NASA asteroid data
#filename = save_file(fwf_data)
filename = "data/ELEMENTS.NUMBR.txt"
with open(filename, 'r') as f:
widths = map(lambda line: len(line) + 1, f.read().splitlines()[1].split(' '))
asteroids_nasa = pandas.read_fwf(filename, widths=widths, skiprows=[1])
widths = [11, 18, 11, 13, 13, 14, 14, 14, 14, 13, 13, 12, 12, 10, 10, 15, 15, 16, 15, 22]
asteroids_taxonomy = pandas.read_fwf(tax_data, widths=widths, skiprows=[1])
print asteroids_nasa.xs(0)
print '\n'
print asteroids_taxonomy.xs(0)
Descriptions
asteroids_nasa.ix[:, 0:2].head(10)
asteroids_nasa.describe()
asteroids_nasa.ix[:, 3].describe()
asteroids_nasa.ix[:, 'a'].describe()
asteroids_nasa.ix[:, 3:7].corr()
asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'})
asteroids_nasa
asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'}, inplace=True)
asteroids_nasa
asteroids_taxonomy
merged = asteroids_nasa.merge(asteroids_taxonomy, on='AST_NAME')
print merged
merged.pop('Ref\n')
merged.pop('COMMENT \n')
merged.columns
ref_codes = merged.pop('DEMEO_REF_CODE')
merged
merged.insert(11, 'DEMEO_REF_CODE', ref_codes)
merged
from pandas.stats.api import ols
model = ols(y=asteroids_nasa.xs('Semimajor Axis', axis=1), x=asteroids_nasa.xs('i', axis=1))
print model
import matplotlib.pyplot as plt