import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
import pandas as pd
from pandas import DataFrame, Series
from __future__ import division
import seaborn as sns
from sklearn.cross_validation import train_test_split
sns.set(style='ticks', palette='Set2')
%matplotlib inline

data = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original",
                   delim_whitespace = True, header=None,
                   names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
                            'model', 'origin', 'car_name'])
print(data.shape)
data = data.dropna()
data.head()

indep_vars = ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration']
dep_vars = ['mpg']
indep_data = data[indep_vars]
dep_data = data[dep_vars]
indep_train, indep_test, dep_train, dep_test = train_test_split(indep_data, dep_data, test_size=0.33, random_state=42)

regr = linear_model.LinearRegression()
regr.fit(indep_train, dep_train)
print('Coefficients: {0}'.format(zip(indep_vars,np.squeeze(regr.coef_))))

regr_predict = regr.predict(indep_test)
print("Residual sum of squares: %.2f"
      % np.mean((regr_predict - dep_test) ** 2))

data.groupby(['cylinders']).mpg.describe()

pivot_table = data.pivot_table(index='cylinders', columns='acceleration', values='mpg', aggfunc=np.mean)
pivot_table.head()

p = plt.hist(data.mpg)
plt.title("MPG")
p
sns.despine()

sns.lmplot("mpg", "weight", data);

sns.lmplot("mpg", "weight", data, order=2);

sns.jointplot("mpg", "weight", data, kind="reg")

sns.boxplot(data[['displacement', 'horsepower']])

sns.violinplot(data[['displacement', 'horsepower']])

g = sns.FacetGrid(data, col="cylinders")
g.map(plt.hist, "mpg");