%time !git clone git://git.openssl.org/openssl.git

from IPython.display import IFrame

IFrame("http://en.wikipedia.org/wiki/OpenSSL#History_of_the_OpenSSL_project", 800, 400)

cd openssl/

!git log --reverse | head -40

!git log -1

!git log --oneline | wc -l

!du -hs -I\.git

!sloccount .

!git log --format=format:"%ai,%an,%H" > ../commits

cd ..

import pandas as pd

df=pd.read_csv("commits", header=None, names=["time", "author", "id"], index_col="time", parse_dates=True)
df.sort(ascending=True, inplace=True)
df.head()

commits_per_author=df.author.value_counts()
commits_per_author

import seaborn as sns

%matplotlib inline

commits_per_author.plot(kind="bar", figsize=(10,6))

df["c"]=1   # counter
commits_over_time=df.c.cumsum().plot()
commits_over_time

authors = commits_per_author.index
timelines=pd.DataFrame(index=df.index)
for author in authors:
    timelines[author]=df.c.where(df.author==author)
timelines.head()

default_palette = sns.color_palette()

sns.set_palette("Set1")
top_authors=authors[:10]
timelines[top_authors].cumsum().plot(style="o",figsize=(20,10))

sns.set_palette(default_palette)

per_months=timelines.resample("3M", how="sum")
per_months["nauthors"]=per_months.applymap(lambda x: min(x, 1)).sum(axis=1)
per_months["nauthors"].plot(kind="bar", figsize=(20,5))

cd openssl/

%%time 
filecounts = []
for commit in df["id"]:
    cfiles =! git ls-tree -r --name-only $commit
    filecounts.append(len(cfiles))

filestats=pd.DataFrame({"filecount": filecounts}, index=df.index)
filestats.plot(figsize=(10,6))

file_changes =! git log --all -M -C --name-only --format='format:' | grep -v '^$'
dfc = pd.Series(list(file_changes))
dfc.value_counts()

c_changes=dfc.where(dfc.str.endswith(".c")).value_counts()
c_changes

c_changes.plot()

h_changes=dfc.where(dfc.str.endswith(".h")).value_counts()
h_changes