This notebook generates the summary statistics for a package.
It assumes you are running this under the tools
directory at the toplevel of the package
package_name = "spaghetti"
This notebook will generate a file in the current directory with the name changelog.md
. You can edit and append this on front of the CHANGELOG.md
file for the package release.
from __future__ import print_function
import os
import json
import re
import sys
import pandas
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output, Popen, PIPE
try:
from urllib import urlopen
except:
from urllib.request import urlopen
import ssl
import yaml
context = ssl._create_unverified_context()
# get date of last tag
x, err = Popen(
'git log -1 --tags --simplify-by-decoration --pretty="%ai"| cat',
stdin=PIPE,
stdout=PIPE,
stderr=PIPE,
shell=True
).communicate()
start_date = x.split()[0].decode("utf-8")
# today's date
release_date = str(datetime.today()).split()[0]
CWD = os.path.abspath(os.path.curdir)
CWD
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since
# get __version__
f = "../{package}/__init__.py".format(package=package_name)
with open(f, 'r') as initfile:
exec(initfile.readline())
cmd = ["git", "log", "--oneline", since_date]
ncommits = len(check_output(cmd).splitlines())
ncommits
Some of our contributors have many aliases for the same identity. So, we've added a mapping to make sure that individuals are listed once (and only once).
identities = {"Levi John Wolf": ("ljwolf", "Levi John Wolf"),
"Serge Rey": ("Serge Rey", "Sergio Rey", "sjsrey", "serge"),
"Wei Kang": ("Wei Kang", "weikang9009"),
"Dani Arribas-Bel": ("Dani Arribas-Bel", "darribas")
}
def regularize_identity(string):
string = string.decode()
for name, aliases in identities.items():
for alias in aliases:
if alias in string:
string = string.replace(alias, name)
if len(string.split(" "))>1:
string = string.title()
return string.lstrip("* ")
author_cmd = ["git", "log", "--format=* %aN", since_date]
from collections import Counter
ncommits = len(check_output(cmd).splitlines())
all_authors = check_output(author_cmd).splitlines()
counter = Counter([regularize_identity(author) for author in all_authors])
unique_authors = sorted(set(all_authors))
unique_authors = counter.keys()
unique_authors
from datetime import datetime, timedelta
ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
PER_PAGE = 100
element_pat = re.compile(r'<(.+?)>')
rel_pat = re.compile(r'rel=[\'"](\w+)[\'"]')
def parse_link_header(headers):
link_s = headers.get('link', '')
urls = element_pat.findall(link_s)
rels = rel_pat.findall(link_s)
d = {}
for rel,url in zip(rels, urls):
d[rel] = url
return d
def get_paged_request(url):
"""get a full list, handling APIv3's paging"""
results = []
while url:
f = urlopen(url)
results.extend(json.load(f))
links = parse_link_header(f.headers)
url = links.get('next')
return results
def get_issues(project="pysal/pysal", state="closed", pulls=False):
"""Get a list of the issues from the Github API."""
which = "pulls" if pulls else "issues"
url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" % (project, which, state, PER_PAGE)
return get_paged_request(url)
def _parse_datetime(s):
"""Parse dates in the format returned by the Github API."""
if s:
return datetime.strptime(s, ISO8601)
else:
return datetime.fromtimestamp(0)
def issues2dict(issues):
"""Convert a list of issues to a dict, keyed by issue number."""
idict = {}
for i in issues:
idict[i["number"]] = i
return idict
def is_pull_request(issue):
"""Return True if the given issue is a pull request."""
return "pull_request_url" in issue
def issues_closed_since(period=timedelta(days=365), project="pysal/pysal", pulls=False):
""" Get all issues closed since a particular point in time. period
can either be a datetime object, or a timedelta object. In the
latter case, it is used as a time before the present.
"""
which = "pulls" if pulls else "issues"
if isinstance(period, timedelta):
period = datetime.now() - period
url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, period.strftime(ISO8601), PER_PAGE)
allclosed = get_paged_request(url)
filtered = [i for i in allclosed if _parse_datetime(i["closed_at"]) > period]
# exclude rejected PRs
if pulls:
filtered = [ pr for pr in filtered if pr['merged_at'] ]
return filtered
def sorted_by_field(issues, field="closed_at", reverse=False):
"""Return a list of issues sorted by closing date date."""
return sorted(issues, key = lambda i:i[field], reverse=reverse)
def report(issues, show_urls=False):
"""Summary report about a list of issues, printing number and title."""
# titles may have unicode in them, so we must encode everything below
if show_urls:
for i in issues:
role = "ghpull" if "merged_at" in i else "ghissue"
print("* :%s:`%d`: %s" % (role, i["number"], i["title"].encode("utf-8")))
else:
for i in issues:
print("* %d: %s" % (i["number"], i["title"].encode("utf-8")))
all_issues = {}
all_pulls = {}
total_commits = 0
prj = "pysal/{package}".format(package=package_name)
issues = issues_closed_since(since, project=prj,pulls=False)
pulls = issues_closed_since(since, project=prj,pulls=True)
issues = sorted_by_field(issues, reverse=True)
pulls = sorted_by_field(pulls, reverse=True)
n_issues, n_pulls = map(len, (issues, pulls))
n_total = n_issues + n_pulls
issue_listing = []
for issue in issues:
entry = "{title} (#{number})".format(title=issue["title"],number=issue["number"])
issue_listing.append(entry)
pull_listing = []
for pull in pulls:
entry = "{title} (#{number})".format(title=pull["title"],number=pull["number"])
pull_listing.append(entry)
pull_listing
message = "We closed a total of {total} issues (enhancements and bug fixes) through {pr} pull requests".format(total=n_total, pr=n_pulls)
message = "{msg}, since our last release on {previous}.".format(msg=message, previous=str(start_date))
message
message += "\n\n## Issues Closed\n"
message
issues = "\n".join([" - "+issue for issue in issue_listing])
message += issues
message += "\n\n## Pull Requests\n"
pulls = "\n".join([" - "+pull for pull in pull_listing])
message += pulls
message
people = "\n".join([" - "+person for person in unique_authors])
people
message +="\n\nThe following individuals contributed to this release:\n\n{people}".format(people=people)
message
head = "# Changes\n\nVersion {version} ({release_date})\n\n".format(version=__version__, release_date=release_date)
print(head+message)
outfile = "changelog.md".format(version=__version__)
with open(outfile, "w") as of:
of.write(head+message)