import requests
import os
import urllib.parse as urlparse
import pandas as pd
import seaborn as sns
import html
import datetime
from IPython.display import HTML
print('\nLast run:', datetime.datetime.now().strftime('%Y-%m-%d %H:%M'))
Last run: 2016-06-17 02:40
%%html
<style>
.scrollable {
max-height: 800px;
overflow: auto;
}
</style>
Longer text in columns please.
pd.options.display.max_colwidth = 250
Read personal access tokens from the env.
username = os.getenv('NB_GITHUB_USERNAME', 'parente')
token = os.getenv('NB_GITHUB_TOKEN', '')
Nice to keep track of how many calls we've got left to the API.
api_remaining = None
Now two functions that will be used throughout.
def linkify(row):
'''Turns the title entry into a link to the issue.'''
if not pd.isnull(row['html_url']):
row['title'] = '<a href="{}" target="_blank">{}</a>'.format(
html.escape(row['html_url'], quote='"'),
html.escape(row['title']))
return row
def get_git(url, **params):
'''Convenience function for pulling from GitHub API endpoints,
whether given a partial path or a full URL.
'''
global api_remaining
segs = urlparse.urlparse(url)
segs = segs._replace(query=urlparse.urlencode(params))
if not segs.netloc:
url = 'https://{}:{}@api.github.com{}'.format(username, token, urlparse.urlunparse(segs))
else:
segs = segs._replace(netloc='{}:{}@{}'.format(username, token, segs.netloc))
url = urlparse.urlunparse(segs)
resp = requests.get(url)
resp.raise_for_status()
api_remaining = int(resp.headers.get('X-RateLimit-Remaining', -1))
return resp
Computed assigned items but don't render it yet. We need this info to create some of the other tables of interest.
People on the team.
team = ['parente', 'lull3rskat3r', 'jhpedemonte', 'dalogsdon', 'nitind', 'jameslmartin',
'poplav', 'deedubbu', 'lbustelo', 'jtyberg', 'aluu317']
team.sort()
Functions to apply.
def assigned_to_user(username):
'''Returns unfiltered DataFrame of issues assigned to username.'''
resp = get_git('/search/issues',
q='is:open org:jupyter-incubator org:jupyter assignee:{}'.format(username))
hits = resp.json()['items']
if not hits:
# dummy entry for users with nothing, 0 for number so we don't become floats
return pd.DataFrame([{'assignee.login' : username, 'number' : 0}])
return pd.io.json.json_normalize(hits)
def colorize(row, username_key, user_to_color):
'''Sets the background color of a row based on the username.'''
username = row[username_key]
return ['background-color: %s' % user_to_color[username.lower()]] * len(row)
GitHub API does not support more than one assignee field per call (supposedly), so iterate.
raw_df = pd.concat(assigned_to_user(username) for username in team)
Filter down to just the columns we want.
columns = ['assignee.login', 'number', 'title', 'html_url', 'updated_at']
if 'milestone.title' in raw_df:
columns.append('milestone.title')
assigned_df = raw_df[columns].reset_index(drop=True)
Make the date/time a datetime.
assigned_df['updated_at'] = pd.to_datetime(assigned_df.updated_at)
Get the org and repo names out of the URL.
assigned_df['repo'] = assigned_df.html_url.str.extract('github.com/.*/(.*)/issues')
assigned_df['org.repo'] = assigned_df.html_url.str.extract('github.com/(.*)/issues')
repos = set(['jupyter/kernel_gateway',
'jupyter/kernel_gateway_demos',
'jupyter/docker-stacks',
'jupyter-incubator/contentmanagement',
'jupyter-incubator/dashboards',
'jupyter-incubator/dashboards_bundlers',
'jupyter-incubator/dashboards_server',
'jupyter-incubator/declarativewidgets',
'jupyter-incubator/dashboards_setup',
'jupyterhub/jupyterhub-deploy-docker']).union(set(assigned_df['org.repo'].dropna()))
def milestones_for_repo(repo):
resp = get_git('/repos/{}/milestones'.format(repo))
resp.raise_for_status()
body = resp.json()
if not body: return
return pd.io.json.json_normalize(body)
def bar_complete(row, complete_color='#dbf0e0', incomplete_color='#dddddd'):
pct = row['closed'] / (row['open'] + row['closed']) * 100
color = complete_color if not row['open'] else incomplete_color
return ['', 'background: linear-gradient(90deg, {} {}%, transparent 0%)'.format(color, pct), '', '']
raw_df = pd.concat(milestones_for_repo(repo) for repo in repos)
milestones_df = raw_df[['title', 'closed_issues', 'open_issues', 'due_on', 'html_url']].reset_index(drop=True)
milestones_df['due_on'] = pd.to_datetime(milestones_df.due_on)
milestones_df['title'] = milestones_df.html_url.str.extract('github.com/.*/(.*)/milestones') + ' ' + milestones_df.title
(milestones_df[milestones_df.closed_issues + milestones_df.open_issues > 0]
.sort_values('title')
.reset_index(drop=True)
.apply(linkify, axis=1)
.drop(['html_url'], axis=1)
.rename(columns={'closed_issues': 'closed', 'open_issues': 'open', 'due_on': 'due'})
.style
.apply(bar_complete, axis=1)
.set_caption('Milestones'))
title | closed | open | due | |
---|---|---|---|---|
0 | dashboards 0.6.0 | 9 | 0 | 2016-06-18 04:00:00 |
1 | dashboards_bundlers 0.8.0 | 4 | 0 | 2016-06-18 04:00:00 |
2 | dashboards_server 0.7.0 | 8 | 0 | 2016-06-18 04:00:00 |
3 | declarativewidgets 0.6.0 | 18 | 0 | 2016-06-17 05:00:00 |
4 | declarativewidgets 0.7.0 | 0 | 6 | 2016-07-01 05:00:00 |
5 | kernel_gateway 0.6.0 | 4 | 0 | NaT |
def stats_for_repo(repo):
resp = get_git('/repos/{}'.format(repo))
resp.raise_for_status()
body = resp.json()
if not body:
return
return pd.io.json.json_normalize(body)
raw_df = pd.concat(stats_for_repo(repo) for repo in repos)
project_df = raw_df[['name', 'open_issues_count', 'subscribers_count', 'stargazers_count', 'forks_count', 'html_url']]
project_df = project_df.rename(columns={'name' : 'title',
'open_issues_count': 'issues',
'subscribers_count': 'watchers',
'stargazers_count': 'stars',
'forks_count': 'forks'})
(project_df.sort_values('title')
.reset_index(drop=True)
.apply(linkify, axis=1)
.drop(['html_url'], axis=1)
.style
.background_gradient(cmap="Blues", low=0, high=0.5)
.set_caption('Projects'))
title | issues | watchers | stars | forks | |
---|---|---|---|---|---|
0 | contentmanagement | 5 | 17 | 33 | 9 |
1 | dashboards | 14 | 31 | 154 | 25 |
2 | dashboards_bundlers | 1 | 10 | 10 | 6 |
3 | dashboards_server | 16 | 13 | 29 | 11 |
4 | dashboards_setup | 1 | 11 | 4 | 4 |
5 | declarativewidgets | 29 | 18 | 56 | 18 |
6 | docker-stacks | 21 | 48 | 495 | 192 |
7 | jupyterhub-deploy-docker | 0 | 5 | 11 | 8 |
8 | kernel_gateway | 13 | 12 | 32 | 12 |
9 | kernel_gateway_demos | 1 | 9 | 9 | 10 |
Now sort, turn titles into links to issues, set background color by username, and render the assigned items table.
assigned_cm = sns.hls_palette(len(team), l=0.9, s=0.4).as_hex()
out = (assigned_df.sort_values(['assignee.login', 'updated_at'], ascending=[True, False])
.apply(linkify, axis=1)
.drop(['html_url', 'org.repo', 'updated_at'], axis=1)
.style
.apply(colorize, axis=1, user_to_color=dict(zip(team, assigned_cm)), username_key='assignee.login')
.set_caption('Assigned Issues')
.render())
HTML('<div class="scrollable">{}</div>'.format(out))
assignee.login | number | title | repo | |
---|---|---|---|---|
7 | Lull3rSkat3r | 159 | # ResponseInfo GET ignored for some Kernels | kernel_gateway |
0 | aluu317 | 0 | nan | nan |
1 | dalogsdon | 0 | nan | nan |
2 | deedubbu | 0 | nan | nan |
3 | jameslmartin | 203 | Example R / Scala notebooks | dashboards_server |
4 | jhpedemonte | 0 | nan | nan |
5 | jtyberg | 16 | Proof point: Show Jupyter Notebook launching kernels remotely | kernel_gateway |
6 | lbustelo | 250 | urth-core-function argument properties should declare types with polymer | declarativewidgets |
8 | nitind | 103 | Proof point: See if notebook-http mode can be made a "plugin" | kernel_gateway |
9 | parente | 0 | nan | nan |
10 | poplav | 0 | nan | nan |
def pull_requests_for_repo(repo):
resp = get_git('/repos/{}/pulls'.format(repo))
resp.raise_for_status()
body = resp.json()
if not body:
return
return pd.io.json.json_normalize(body)
raw_df = pd.concat(pull_requests_for_repo(repo) for repo in repos)
prs_df = raw_df[['user.login', 'number', 'title', 'base.repo.name', 'updated_at', 'html_url']].reset_index(drop=True)
prs_df['updated_at'] = pd.to_datetime(prs_df.updated_at)
prs_df['wip'] = prs_df.title.str.contains('WIP')
contributors = prs_df['user.login'].drop_duplicates().sort_values().str.lower()
prs_cm = sns.hls_palette(contributors.size, l=0.9, s=0.4).as_hex()
out = (prs_df.sort_values(['wip', 'user.login', 'updated_at'], ascending=[True, True, False])
.apply(linkify, axis=1)
.drop(['html_url', 'updated_at', 'wip'], axis=1)
.style
.apply(colorize, axis=1, user_to_color=dict(zip(contributors, prs_cm)), username_key='user.login')
.set_caption('Open Pull Requests')
.render())
HTML('<div class="scrollable">{}</div>'.format(out))
user.login | number | title | base.repo.name | |
---|---|---|---|---|
4 | mwaaas | 152 | added onbuild container for datascience | docker-stacks |
7 | nitind | 171 | [Issue: 103] Proof point: See if notebook-http mode can be made a "plugin" | kernel_gateway |
3 | peller | 206 | Scala system tests | declarativewidgets |
2 | poplav | 390 | Serialize dates and display column types | declarativewidgets |
1 | poplav | 398 | [Do NOT merge] - Testing R system tests | declarativewidgets |
0 | Lull3rSkat3r | 403 | [WIP] Standalone widgets | declarativewidgets |
6 | jhpedemonte | 196 | [WIP] [Issue 18] Remove Gridstack.destroy() workaround | dashboards |
5 | jtyberg | 21 | [WIP] Kernel Gateway client notebook extension | kernel_gateway_demos |
8 | poplav | 137 | [WIP] [Issue 131] - Implement kernel connection file reset if fails to start for the first time | kernel_gateway |