cd ~/projekte/openbsd/openbsd-src/

!git log --format=format:"%ai, %an" > ../commits

cd ..

ls

!head commits

import pandas as pd

%time df=pd.read_csv("commits", header=None, names=["time", "author"])

df

df.head()

df.author.value_counts()

%time df.index = pd.to_datetime(df['time'])

df.tail()

df.sort_index(inplace=True)

df.tail()

del df['time']

df["c"]=1

df['author'][2343]

df['author'] = df['author'].map(lambda x: x.strip())

df.head()

commits_per_person = df.author.value_counts()

commits_per_person.describe()

%pylab inline

import matplotlib.pyplot as plt
plt.figsize(10,6)

commits_per_person.plot()

top30=commits_per_person[:30]
top30

top30.plot(kind="barh")

commits_per_person['markus']

genuesen = ["markus", "bluhm", "mpf", "hshoexer", "grunk"]
genu_commits = commits_per_person.ix[genuesen]
genu_commits

bluhm = df[df.author == "bluhm"]

mpf = df[df.author == "mpf"]

bluhm.c.cumsum().plot(style="r", label="bluhm")
mpf.c.cumsum().plot(style="--", label="mpf")
title("mpf vs. bluhm")
legend(loc="best")

markus = df[df.author == "markus"]
hshoexer = df[df.author == "hshoexer"]
grunk = df[df.author == "grunk"]

bluhm.c.cumsum().plot(style="r.", label="bluhm", alpha=0.2)
mpf.c.cumsum().plot(style="--.", label="mpf", alpha=0.2)

hshoexer.c.cumsum().plot(style="g-..", label="hshoexer")
grunk.c.cumsum().plot(style="m--.", label="grunk")
markus.c.cumsum().plot(style='k:.', label="markus", alpha=0.2)
legend(loc=0)
savefig("genucommits.pdf")

bluhm.head(1)

mpf.head(1)

markus.head(1)

hshoexer.tail(1)

grunk.tail(1)

df.ix['2013']

df.c.cumsum().plot()

cmon=df.resample("M", how="sum").c.cumsum()

y=cmon.values
y

x=arange(cmon.size)
x

p=np.polyfit(x,y, 3)
p

x1=arange(300)

plot(x,y)
plot(x1,np.polyval(p,x1), "r")
xlabel("months")

np.polyval(p,x1).max() 

y.max()

pd.Series(np.polyval(p,x1)).idxmax()

pd.Series(x).idxmax()

per_day=df.resample("D", how="sum")

per_day.head()

per_day.c.max()

per_day.c.idxmax()

df.ix["2001-06-25"]

per_month=df.resample("M", how="sum")
per_month.max()

per_month.idxmax()

per_month.c.describe()

per_month.c.plot()

pd.rolling_mean(per_month.c, 15).plot()

df["datetime"]=df.index
df.head()

df['weekday'] = df['datetime'].apply(lambda x: x.isoweekday())

df.head()

wd=df.ix[:,["c","weekday"]]

per_weekday=wd.groupby("weekday")

per_weekday.sum()

per_weekday.sum().plot(kind="bar")

df['hour'] = df['datetime'].apply(lambda x: x.hour)

per_hour = df.groupby('hour')['c'].sum()
per_hour.plot(kind="bar")