%pylab inline

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from StringIO import StringIO
from datetime import datetime

bugs_url = "http://bugs.mysql.com/search-csv.php?status=all&severity=all&mine=9242646"

bugs_req = requests.get(bugs_url)
csv_file = StringIO()
csv_file.write(bugs_req.text)
csv_file.seek(0)
bugsdf = pd.read_csv(csv_file)

bugsdf.head()

fig = plt.figure(figsize=(8,10), dpi=100)
bugsdf.Type.value_counts(ascending=True).plot(kind='barh')

def getversion(inputver):
    if isinstance(inputver, float):
        return inputver
    return inputver[:3]
bugsdf['major_version'] = bugsdf.Version.map(getversion)

compstat = bugsdf.groupby('major_version').Type.value_counts().unstack().T
fig = plt.figure(figsize=(15,10), dpi=100)
plt.pcolor(compstat, vmin=0, vmax=5, cmap='Blues')
plt.yticks(np.arange(0.5, len(compstat.index), 1), compstat.index)
plt.xticks(np.arange(0.5, len(compstat.columns), 1), compstat.columns)

fig = plt.figure(figsize=(8,10), dpi=100)
plt.pcolor(compstat, vmin=0, vmax=10, cmap='Blues')
plt.yticks(np.arange(0.5, len(compstat.index), 1), compstat.index)
plt.xticks(np.arange(0.5, len(compstat.columns), 1), compstat.columns)

bugsdf[bugsdf.major_version == '5.6'].Status.value_counts().plot(kind='bar')

bugsdf.groupby('major_version').Status.value_counts().unstack().plot(kind='barh', stacked=True)


bugsdf[bugsdf.Status != 'Closed'].groupby('major_version').Status.value_counts().unstack().plot(kind='bar', stacked=True)

bugsdf['Entered'] = pd.to_datetime(bugsdf.Entered)
bugsdf['Modified'] = pd.to_datetime(bugsdf.Modified)

bugsdf[-bugsdf.Status.isin(['Closed', 'Duplicate', 'Won\'t fix', 'Can\'t repeat'])].sort(columns='Entered')[:5]

bugsdf[-bugsdf.Status.isin(['Closed', 'Duplicate', 'Won\'t fix', 'Can\'t repeat'])] \
      [bugsdf.Modified < datetime(2013, 6, 1)] \
      [bugsdf.Modified > datetime(1970, 1, 1)]