cd ~/projekte/openbsd/openbsd-src/ !git log --format=format:"%ai, %an" > ../commits cd .. ls !head commits import pandas as pd %time df=pd.read_csv("commits", header=None, names=["time", "author"]) df df.head() df.author.value_counts() %time df.index = pd.to_datetime(df['time']) df.tail() df.sort_index(inplace=True) df.tail() del df['time'] df["c"]=1 df['author'][2343] df['author'] = df['author'].map(lambda x: x.strip()) df.head() commits_per_person = df.author.value_counts() commits_per_person.describe() %pylab inline import matplotlib.pyplot as plt plt.figsize(10,6) commits_per_person.plot() top30=commits_per_person[:30] top30 top30.plot(kind="barh") commits_per_person['markus'] genuesen = ["markus", "bluhm", "mpf", "hshoexer", "grunk"] genu_commits = commits_per_person.ix[genuesen] genu_commits bluhm = df[df.author == "bluhm"] mpf = df[df.author == "mpf"] bluhm.c.cumsum().plot(style="r", label="bluhm") mpf.c.cumsum().plot(style="--", label="mpf") title("mpf vs. bluhm") legend(loc="best") markus = df[df.author == "markus"] hshoexer = df[df.author == "hshoexer"] grunk = df[df.author == "grunk"] bluhm.c.cumsum().plot(style="r.", label="bluhm", alpha=0.2) mpf.c.cumsum().plot(style="--.", label="mpf", alpha=0.2) hshoexer.c.cumsum().plot(style="g-..", label="hshoexer") grunk.c.cumsum().plot(style="m--.", label="grunk") markus.c.cumsum().plot(style='k:.', label="markus", alpha=0.2) legend(loc=0) savefig("genucommits.pdf") bluhm.head(1) mpf.head(1) markus.head(1) hshoexer.tail(1) grunk.tail(1) df.ix['2013'] df.c.cumsum().plot() cmon=df.resample("M", how="sum").c.cumsum() y=cmon.values y x=arange(cmon.size) x p=np.polyfit(x,y, 3) p x1=arange(300) plot(x,y) plot(x1,np.polyval(p,x1), "r") xlabel("months") np.polyval(p,x1).max() y.max() pd.Series(np.polyval(p,x1)).idxmax() pd.Series(x).idxmax() per_day=df.resample("D", how="sum") per_day.head() per_day.c.max() per_day.c.idxmax() df.ix["2001-06-25"] per_month=df.resample("M", how="sum") per_month.max() per_month.idxmax() per_month.c.describe() per_month.c.plot() pd.rolling_mean(per_month.c, 15).plot() df["datetime"]=df.index df.head() df['weekday'] = df['datetime'].apply(lambda x: x.isoweekday()) df.head() wd=df.ix[:,["c","weekday"]] per_weekday=wd.groupby("weekday") per_weekday.sum() per_weekday.sum().plot(kind="bar") df['hour'] = df['datetime'].apply(lambda x: x.hour) per_hour = df.groupby('hour')['c'].sum() per_hour.plot(kind="bar")