%%html
<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>
Canada's Open Government Portal includes NSERC Awards Data from 1995 through 2016.
The awards data (in .csv format) were copied to an Amazon Web Services S3 bucket.
Acknowledgement: This notebook was constructed using JupyterHub service from syzygy.ca with assistance from Ian Allison and India Heisz. -- James Colliander
This process is loading NSERC Awards data starting with 1995 and moving to 2016...
## Thanks Ian Allison.
## Install some python libraries
import numpy as np
import pandas as pd
import sys
## Build a data frame with NSERC Awards Data by iteratively reading CSV files hosted on AWS S3.
## It would improve reproducibility if we imported from a definitive NSERC Awards Data.
df = pd.DataFrame()
startYear = 2017
endYear = 2018 # The last year is not included, so 2017 means we include the 2016 collection but not 2017.
## some columns dropped here for convenience
for year in range(startYear, endYear):
# file = 'https://s3.ca-central-1.amazonaws.com/open-data-ro/NSERC/NSERC_GRT_FYR' + str(year) + '_AWARD.csv.gz'
# https://pims-open-data.s3-us-west-2.amazonaws.com/NSERC_GRT_FYR2008_AWARD.csv
file = 'https://pims-open-data.s3-us-west-2.amazonaws.com/NSERC_GRT_FYR' + str(year) + '_AWARD.csv'
df = df.append(pd.read_csv(file,
usecols = [1, 2, 3, 4, 5, 7, 9, 11, 12, 13, 17, 28],
encoding='latin-1'
)
)
print(year)
## Rename columns for better readability.
df.columns = ['Name', 'Department', 'OrganizationID',
'Institution', 'ProvinceEN', 'CountryEN',
'FiscalYear', 'AwardAmount', 'ProgramID',
'ProgramNameEN', 'Committee', 'ResearchSubjectEN']
## Strip out any leading or trailing whitespace in the ProgramID column
df['ProgramID'] = df['ProgramID'].str.strip();
2017
## via India Heisz
## Define some methods
## Plotly is not installed by default
## TODO Remove plotly dependence or arrange for plotly installed as default on the hub
import matplotlib.ticker as mtick
# import plotly.graph_objs as go
# import plotly.offline as py
# from plotly.offline import init_notebook_mode, iplot
# import plotly.tools as tls
import matplotlib.pylab as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
def nsercPlot (data):
fig, axes = plt.subplots()
xAxis = 'FiscalYear'
yAxis = 'AwardAmount'
y = data.groupby(xAxis).sum()[yAxis]
x = y.index
plt.xlabel(xAxis, fontsize=14)
plt.ylabel(yAxis, fontsize=14)
plt.title(title)
plt.plot(x,y)
init_notebook_mode(connected=True)
axes.scatter(x,y,s=plotPointSizes,alpha=opacity)
canvas = FigureCanvas(fig)
plotly_fig = tls.mpl_to_plotly(fig)
py.iplot(plotly_fig)
return;
def viewAvailableSearch(column, searchString):
available = df.drop_duplicates(subset = column)
available = available[available[column].str.contains(searchString, na=False)]
sorted = available.sort_values(by=[column], ascending=[True])
print(sorted.to_string(columns= [column], index=False))
return;
def overview(column, data):
mean = data[columnYouWantInformationOn].mean()
print('The mean of ' + str(columnYouWantInformationOn) + ' is ' + str(mean))
median = data[columnYouWantInformationOn].median()
print('The median of ' + str(columnYouWantInformationOn) + ' is ' + str(median))
standardDeviation = data[columnYouWantInformationOn].std()
print('The standard deviation of ' + str(columnYouWantInformationOn) + ' is ' + str(standardDeviation))
awardCount = data.AwardAmount.count()
print('The total number of awards for your selection is ' + str(awardCount))
return;
An exploration of the table.
df
## Display the dataframe on the screen as a table.
Name | Department | OrganizationID | Institution | ProvinceEN | CountryEN | FiscalYear | AwardAmount | ProgramID | ProgramNameEN | Committee | ResearchSubjectEN | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aagaard, Mark | Electrical & Computer Engineering | 33 | University of Waterloo | Ontario | CANADA | 2017 | 22000 | RGPIN | Discovery Grants Program - Individual | 1510 | Computer hardware |
1 | Aamodt, Tor | Electrical and Computer Engineering - Electric... | 2 | University of British Columbia | British Columbia | CANADA | 2017 | 65000 | RGPIN | Discovery Grants Program - Individual | 1510 | Distributed and parallel processing |
2 | Aamodt, Tor | Electrical and Computer Engineering | 2 | University of British Columbia | British Columbia | CANADA | 2017 | 40000 | RGPAS | Discovery Grants Program - Accelerator Supplem... | 1559 | Not available |
3 | Aamodt, Tor | Electrical and Computer Engineering | 2 | University of British Columbia | British Columbia | CANADA | 2017 | 245000 | STPGP | Strategic Projects - Group | 1588 | Computer hardware |
4 | Aamodt, Tor | Electrical and Computer Engineering - Electric... | 2 | University of British Columbia | British Columbia | CANADA | 2017 | 150000 | RTI | Research Tools and Instruments | 1610 | Computer architecture and design |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
25546 | Zwiazek, Janusz | Renewable Resources | 9 | University of Alberta | Alberta | CANADA | 2017 | 122000 | CRDPJ | Collaborative Research and Development Grants | 96 | Plant and treebiology |
25547 | Zwiazek, Janusz | Renewable Resources | 9 | University of Alberta | Alberta | CANADA | 2017 | 100000 | CRDPJ | Collaborative Research and Development Grants | 96 | Plant and treebiology |
25548 | Zwiazek, Janusz | Renewable Resources | 9 | University of Alberta | Alberta | CANADA | 2017 | 103600 | CRDPJ | Collaborative Research and Development Grants | 96 | Not available |
25549 | Zwimpfer, Charlotte | Botany | 2 | University of British Columbia | British Columbia | CANADA | 2017 | 4500 | URU | University Undergraduate Student Research Awards | 996 | Evolution and ecology |
25550 | Zytner, Richard | Engineering, School of | 24 | University of Guelph | Ontario | CANADA | 2017 | 20000 | RGPIN | Discovery Grants Program - Individual | 1509 | Bioremediation |
25551 rows × 12 columns
## Number of rows representing all award reports from NSERC during 1995-2016
len(df)
25551
df.columns
Index(['Name', 'Department', 'OrganizationID', 'Institution', 'ProvinceEN', 'CountryEN', 'FiscalYear', 'AwardAmount', 'ProgramID', 'ProgramNameEN', 'Committee', 'ResearchSubjectEN'], dtype='object')
Money flowed out of NSERC? We have a data set that reports NSERC disbursements with data to answer:
When? Who received it? Which university? Province? How much? Which NSERC program?
df['Department'].unique()
array(['Electrical & Computer Engineering', 'Electrical and Computer Engineering - Electrical and Computer Engineering', 'Electrical and Computer Engineering', ..., 'Ted Rogers School of Management - Ted Rogers School of Management', 'Ted Rogers School of Management', 'Dept. of Chemistry, Fac. of Science'], dtype=object)
## Number of unique departments
len(df['Department'].unique())
2096
df['ProvinceEN'].unique()
array(['Ontario', 'British Columbia', 'Québec', 'Newfoundland and Labrador', 'Manitoba', nan, 'Prince Edward Island', 'Nova Scotia', 'Pennsylvania', 'Alberta', 'Saskatchewan', 'Massachusetts', 'New Brunswick', 'Minnesota', 'New York', 'New Jersey', 'Washington', 'California', 'Colorado', 'North Carolina', 'Texas', 'Connecticut', 'Illinois', 'Rhode Island', 'District of Columbia', 'Oregon', 'Michigan', 'Georgia', 'Hawaii', 'Indiana', 'Missouri', 'New Mexico', 'Maryland', 'Wisconsin', 'Arizona', 'Nunavut', 'Florida', 'Northwest Territories', 'Nebraska', 'Iowa', 'Louisiana', 'Yukon', 'New Hampshire', 'Alaska', 'Tennessee'], dtype=object)
# How many unique ProgramID values?
len(df['ProgramID'].unique())
98
## Mismatch in program labels?
## TODO: Match ProgramNameEN to ProgramID; Find outliers
len(df['ProgramNameEN'].unique())
98
## Identify programs that have received the most funding
pd.pivot_table(df, index=['ProgramNameEN','FiscalYear'], aggfunc=sum,
values = ['AwardAmount']
).sort_values(by=['AwardAmount'], ascending=False)
AwardAmount | ||
---|---|---|
ProgramNameEN | FiscalYear | |
Discovery Grants Program - Individual | 2017 | 340998868 |
Canada Research Chairs | 2017 | 108311474 |
Collaborative Research and Development Grants | 2017 | 102355491 |
Canada First Research Excellence Fund | 2017 | 101820801 |
Strategic Projects - Group | 2017 | 45001328 |
... | ... | ... |
JSPS Researcher Exchange Program | 2017 | 10000 |
Aboriginal Ambassadors in the Natural Sciences and Engineering Supplement | 2017 | 8891 |
Connect Grants Level 2 for colleges Ontario | 2017 | 8000 |
NSERC Student Ambassadors | 2017 | 8000 |
Connect Grants Level 2 for colleges Quebec | 2017 | 6237 |
98 rows × 1 columns
## Identify programs that have received the most funding
pd.pivot_table(df, index=['FiscalYear', 'ProgramNameEN'], aggfunc=sum,
values = ['AwardAmount']
).sort_values(by=['AwardAmount'], ascending=False)
AwardAmount | ||
---|---|---|
FiscalYear | ProgramNameEN | |
2017 | Discovery Grants Program - Individual | 340998868 |
Canada Research Chairs | 108311474 | |
Collaborative Research and Development Grants | 102355491 | |
Canada First Research Excellence Fund | 101820801 | |
Strategic Projects - Group | 45001328 | |
... | ... | |
JSPS Researcher Exchange Program | 10000 | |
Aboriginal Ambassadors in the Natural Sciences and Engineering Supplement | 8891 | |
Connect Grants Level 2 for colleges Ontario | 8000 | |
NSERC Student Ambassadors | 8000 | |
Connect Grants Level 2 for colleges Quebec | 6237 |
98 rows × 1 columns
programYear = pd.pivot_table(df, index=['ProgramNameEN','FiscalYear'], aggfunc=sum,
values = ['AwardAmount']
).sort_values(by=['AwardAmount'], ascending=False)
## Export this table as a CSV file.
programYear.to_csv('program-year.csv')
## Historical Program recipents of most NSERC funds: 1995 -- 2016
df.groupby(['ProgramNameEN']).sum()['AwardAmount'].sort_values(ascending=False).head(n=50)
ProgramNameEN Discovery Grants Program - Individual 340998868 Canada Research Chairs 108311474 Collaborative Research and Development Grants 102355491 Canada First Research Excellence Fund 101820801 Strategic Projects - Group 45001328 Networks of Centres of Excellence 31315000 Industrial Research Chairs 29788775 Alexander Graham Bell Canada Graduate Scholarships - Doctoral 28355366 Engage Grants Program 28214847 Research Tools and Instruments 27616707 Collaborative Research and Training Experience 24100517 Postgraduate Scholarships - Doctoral 21283058 Subatomic Physics Envelope - Project 17673700 College and Community Innovation Program 15479624 University Undergraduate Student Research Awards 15425203 Postdoctoral Fellowships 15148612 Discovery Grants Program - Accelerator Supplements 14957501 Canada Excellence Research Chairs 14816667 Alexander Graham Bell Canada Graduate Scholarships - Master's 13722918 Strategic Network Grants Program 11923365 Centres of Excellence for Commercialization and Research - Group 11752676 Applied Research and Development Grants - Level 2 10858337 Collaborative Health Research Projects 10049634 PromoScience 9834488 Applied Research and Development Grants - Level 1 9061665 Experience Awards (previously Industrial Undergraduate Student Research Awards) 8335634 Vanier Canada Graduate Scholarship Tri-Council - Doctoral 3 years 8287499 Technology Access Centre 7249999 Major Resources Support Program - Infrastructure 6500000 Idea to Innovation 6152182 Business-led Networks of Centres of Excellence - Group 6096500 Automotive Partnership Canada Project 4837606 Thematic Resources Support in Mathematics and Statistics 4337726 Subatomic Physics Envelope - Individual 4245220 Industrial Research Chairs for Colleges Grants 3799810 Cooperative Activities 3664866 Banting Postdoctoral Fellowships Tri-council 3138333 Climate Change and Atmospheric Research 2958731 Subatomic Physics Envelope - Major Resources Support Program 2380000 Discovery Grants Program - Northern Research Supplement 2108850 DND/NSERC Discovery Grant Supplement 2000000 Chairs in Design Engineering - Research 1807500 Department of National Defence / NSERC Research Partnership 1521245 EWR Steacie Fellowships - Supplement 1500000 Applied Research and Development Grants - Level 3 1473259 Engage Plus Grants Program 1417974 Canada 150 Research Chairs 1359118 Applied Research Tools and Instruments Grants 1284997 Discovery Grants Program - Group 1100800 Canadian Institute for Theoretical Astrophysics Support Program 1100000 Name: AwardAmount, dtype: int64
## Historical PI recipents of most NSERC funds: 1995 -- 2016
df.groupby(['Name']).sum()['AwardAmount'].sort_values(ascending=False).head(n=70)
Name Zink, Laura 9868590 Forrest, Tracey 9207594 Babiuk, Lorne 9118877 Wallace, Douglas 9090372 Hayter, Nancy 9079166 ... Bourbonnais, Normand 1229120 Sargent, Edward 1226589 Soucy, Josée 1217966 Blowes, David 1197444 Colliander, James 1195450 Name: AwardAmount, Length: 70, dtype: int64
ax = (df.groupby(['FiscalYear']).sum()['AwardAmount'].plot(kind="bar", title="Total Expenditures vs. Time"))
ax.set_ylabel("Reported Dollars")
Text(0, 0.5, 'Reported Dollars')
## Total Annual Budget
df.groupby(['FiscalYear']).sum()['AwardAmount']
FiscalYear 1995 451159747 1996 434116337 1997 417146970 1998 477141539 1999 526133116 2000 537934451 2001 554698568 2002 615320903 2003 696997644 2004 765643760 2005 820640167 2006 854567999 2007 968713281 2008 980889930 2009 1003483757 2010 1026366682 2011 1035205511 2012 1021894152 2013 1018139165 2014 1038149414 2015 1068044894 2016 1142066427 Name: AwardAmount, dtype: int64
## The ~ operator negates the condition.
ax = (df.loc[~(df['ProgramNameEN'].str.contains('iscovery'))].groupby(['FiscalYear']).sum()['AwardAmount'].plot(kind="bar", title="Not containing 'iscovery' vs. Time"))
ax.set_ylabel("Reported Dollars")
Text(0, 0.5, 'Reported Dollars')
df.loc[~(df['ProgramNameEN'].str.contains('iscovery'))].groupby(['FiscalYear']).sum()['AwardAmount']
FiscalYear 1995 267113329 1996 247533171 1997 227810152 1998 266731883 1999 299956302 2000 294508093 2001 316537927 2002 357333281 2003 423803197 2004 488699482 2005 519567082 2006 545233090 2007 655321429 2008 664642572 2009 680426976 2010 702101715 2011 705872398 2012 685991741 2013 681486392 2014 698064881 2015 727279990 2016 791496311 Name: AwardAmount, dtype: int64
ax = (df.loc[(df['ProgramNameEN'].str.contains('iscovery'))].groupby(['FiscalYear']).sum()['AwardAmount']
.plot(kind="bar", title="Containing 'iscovery' vs. Time"))
ax.set_ylabel("Reported Dollars")
Text(0, 0.5, 'Reported Dollars')
df.loc[(df['ProgramNameEN'].str.contains('iscovery'))].groupby(['FiscalYear']).sum()['AwardAmount']
FiscalYear 1995 184046418 1996 186583166 1997 189336818 1998 210409656 1999 226176814 2000 243426358 2001 238160641 2002 257987622 2003 273194447 2004 276944278 2005 301073085 2006 309334909 2007 313391852 2008 316247358 2009 323056781 2010 324264967 2011 329333113 2012 335902411 2013 336652773 2014 340084533 2015 340764904 2016 350570116 Name: AwardAmount, dtype: int64
disc = df.loc[(df['ProgramNameEN'].str.contains('iscovery'))]
disc['ProgramNameEN'].unique()
array(['Discovery Grants Program - Individual', 'Discovery Grants Program - Group', 'Discovery Grants Program - Project', 'Discovery Grants Program - Institutes and Initiatives', 'Discovery Grants Program - Multidisciplinary Network Groups', 'Discovery Grants Program - Ship Time', 'Discovery Grants Program - Accelerator Grant', 'Discovery Grants Program - Leadership Support', 'Discovery Grants Program - Northern Research Supplement', 'Discovery Grants Program - Accelerator Supplements', 'Discovery Frontiers - Northern Earth System Research', 'Discovery Frontiers - Advancing Big Data Science in Genomics Research ', 'Discovery Frontiers - Digging into Data ', 'Discovery Development Grant', 'Discovery Frontiers - New Materials for Clean Energy and Energy Efficiency'], dtype=object)
## Distribution of Discovery Funding Across Programs
(disc.groupby(['ProgramNameEN','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=False, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Total dollars invested by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x11d73f550>
The bulk of the historical expenses in Discovyer took place in "Discovery Grants Program - Individual"
notdisc = df.loc[~(df['ProgramNameEN'].str.contains('iscovery'))]
notdisc['ProgramNameEN'].unique()
array(['Postgraduate Scholarships', 'Industrial Undergraduate Student Research Awards', 'Postdoctoral Fellowships', 'Industrial Postgraduate Scholarships', '1967 Science and Engineering Scholarships', 'University Undergraduate Student Research Awards', 'Collaborative Research and Development Grants', 'Canada International Fellowships (H)', 'Infrastructure Grants (H)', 'Strategic Projects - Group', 'Research Tools and Instruments - Category 1 (<$150,000)', "Women's Faculty Awards - Salary (H)", "Women's Faculty Awards (H)", 'Industrially Oriented Research Grants', 'International: North American Research Fellowships', 'International : Workshops', 'Networks of Centres of Excellence', 'Industrial Research Chairs', 'Miscellaneous Grants', 'Conference Grants (H)', 'Strategic Grants - Individual (H)', 'Workshops and Seminars (H)', 'Collaborative Project Grants (H)', 'Industrial Research Fellowships', 'Cooperative Activities', 'University-Industry Affiliations (H)', 'Strategic Grants - Equipment (H)', 'Subatomic Physics Envelope - Project', 'Bilateral Exchange Program (H)', 'Research Reorientation Associateships (H)', 'Subatomic Physics Envelope - Individual', 'Subatomic Physics Envelope - Major Facilities Access', 'Subatomic Physics Envelope - Group', 'Collaborative Research and Development Grants - Government (H)', 'University Research Fellowships (H)', 'Attachés de recherche (H)', 'Miscellaneous', 'Subatomic Physics Envelope - Research Tools and Instruments', 'Research Tools and Instruments - Category 2 ($150,000 - $325,000)', 'Special Postgraduate Scholarships for Employed Scientists and Engineers (H)', 'EWR Steacie Fellowships - Supplement', 'New Faculty Support Grants', 'Collaborative Special Project Grants', 'Senior Industrial Fellowships (H)', 'Other Government Chairs (H)', 'Research Tools and Instruments - Category 3 (>$325,000)', 'Research Networks', "NSERC/SSHRC Master's Scholarships in Science Policy (H)", 'Canadian Microelectronics Corporation', 'Intellectual Property Mobilization - Infrastructure', 'Postgraduate Scholarships in Science Librarianship and Documentation (H)', 'Chairs for Women in Science and Engineering - Project', 'Foreign Researcher Awards (H)', 'Request for Applications Program (H)', 'Subatomic Physics Envelope - Conference (H)', 'CARNET', 'Shared Equipment and Facilities Grants (H)', 'Major Facilities Access Grants', 'Department of National Defence / NSERC Research Partnership', 'National Research Council / NSERC Research Partnership', 'Technology Partnerships Program', 'Agriculture and Agri-Food Canada Research Partnership', 'Chairs in the Management of Technological Change', 'Canadian Forest Service Research Partnership', 'Canadian Space Agency Research Partnership', 'Micronet Research Partnership', 'Networks of Centres of Excellence - Letters of Intent', 'Doctoral Prizes', 'Canadian Wildlife Service Research Partnership', 'International Opportunity Fund', 'Parental Leave - Research Grants', 'University Faculty Award', 'Genomics Projects', 'Tri-Council Workshop/Networking Program', 'Collaborative Research Opportunities Grants', 'Collaborative Health Research Projects', 'Miscellaneous - Scholarships and Fellowships - Project', 'Chairs in Design Engineering - Research', 'PromoScience', 'Canada Research Chairs', 'Gerhard Herzberg Canada Gold Medal for Science and Engineering', 'Presidential Fund', 'NRCan - Earth Sciences Sector/NSERC Research Partnership - Project', 'Fuel Cell Technology Program', 'Michael Smith Award for Science Promotion - Organization', 'Students Promoting Awareness of Research Knowledge', 'Fuel Cell Postgraduate Stipends', 'Michael Smith Award for Science Promotion - Individual', 'Fuel Cell Technology Program - Chair Project', 'Northern Research Chairs Program - Grant', 'Perimeter Institute', 'Synergy Awards', 'NSERC/Energy Sector, Natural Resources Canada', 'NanoIP - Project', 'NSERC - EMPOWR Microelectronics HQP Training', 'Research Management Funds - Group', 'Alexander Graham Bell Canada Graduate Scholarships - Doctoral', "Alexander Graham Bell Canada Graduate Scholarships - Master's", 'Special Research Opportunity Program - Pre-research', 'Idea to Innovation', 'Summer Program in Japan', 'Strategic Projects BIOCAP', 'New Media Initiatives', "Postgraduate Scholarships - Master's", 'Postgraduate Scholarships - Doctoral', 'Summer Program in Taiwan', 'Northern Research Internships', 'Research Capacity Development in Small Universities', 'College and Community Innovation Pilot Program', 'Special Research Opportunity Program - Project', 'Joint Infrastructure Interdependencies Research Program', 'Special Research Opportunity Program - Northern Research', 'Innovation Challenge Award', 'Northern Research Chair Program', 'Special Research Opportunity Program - Inter-American Collaboration in Materials Research', 'Centres for Research in Youth, Science Teaching and Learning', 'Brockhouse Canada Prize for Interdisciplinary Research in Science and Engineering', 'Regional Office Discretionary Funds - Atlantic', 'Regional Office Discretionary Funds - Prairie', 'Regional Office Discretionary Funds - British Columbia', 'Quantum Works Innovation Platform', 'John C. Polanyi Award', 'Strategic Projects Supplemental Competition', 'NRC-NSERC-BDC Nanotechnology Initiative', "Government of Canada's Program for International Polar Year", 'Subatomic Physics Envelope - Major Resources Support Program', 'Major Resources Support Program - Infrastructure', "Industrial Scholarship in Partnership with the FQRNT - Master's", 'Strategic Workshops Program', 'Miscellaneous Grants - Dissemination', 'Centres of Excellence for Commercialization and Research - Group', 'Industrial Scholarship in Partnership with the FQRNT- Doctoral', 'College and Community Innovation Program', 'NSERC/NRCan/AECL Generation IV Energy Technologies Program', 'Canadian Graduate Scholarships Foreign Study Supplements', 'Regional Opportunities Fund - Quebec', 'Business-led Networks of Centres of Excellence - Group', 'Industrial R&D Internships', 'Collaborative Research and Training Experience ', 'Vanier Canada Graduate Scholarships - Doctoral ', 'Regional Opportunities Fund - Ontario ', 'Interaction Grants Program ', 'Engage Grants Program ', 'Alternative Radiopharmaceuticals for Medical Imaging ', 'Aboriginal Ambassadors in the Natural Sciences and Engineering Supplement ', 'Special Research Opportunities - Canada-Israel Program ', 'NSERC Awards for Science Promotion - Individual ', 'Strategic Research Networks Program Letters of Intent ', 'JSPS Researcher Exchange Program ', 'Automotive Partnership Canada Project ', 'Industrial R&D Fellowships (IRDF)', 'Applied Research Tools and Instruments Grants', 'Regional Office Discretionary Funds', 'Canada Excellence Research Chairs', 'College and Community Innovation Program - Entry Level', 'G8 Initiative Research Councils on Multilateral Research Funding', 'Applied Research and Development Grants - Level 1', 'Banting Postdoctoral Fellowships', 'Technology Access Centre', 'Industrial Research Chairs for Colleges Grants', 'Parental Leave - Scholarships & Fellowships', 'Technology Access Centre - Letters of Intent', 'Applied Research and Development Grants - Level 2', 'College - University Idea to Innovation Grants', 'Climate Change and Atmospheric Research', 'Industrial Undergraduate Student Research Awards Program', 'Applied Research and Development Grants - Level 3', 'Unique Initiatives Fund', 'Partnership Workshops Grants - Colleges', 'Business-led Networks of Centres of Excellence - Letters of Intent ', 'Fellowship Innovation Platform ', 'Banting Postdoctoral Fellowships Tri-council ', 'Vanier Canada Graduate Scholarship Tri-Council - Doctoral 3 years ', 'Undergraduate Student Research Awards Program - Colleges ', 'Engage Plus Grants Program ', 'EWR Steacie Fellowships - Salary ', 'College Special Initiatives ', 'Partnerships Innovation Platform ', 'Thematic Resources Support in Mathematics and Statistics ', 'Belmont Forum Arctic Observing and Research ', 'College Synergy Awards ', 'Research Tools and Instruments', 'Canada First Research Excellence Fund', 'Engage Plus Grants for colleges', 'Experience Awards (previously Industrial Undergraduate Student Research Awards)', 'Connect Grants Level 1', 'Connect Grants Level 2', 'Strategic Network Grants Program', 'Connect Grants Level 3', 'WaterWorks Joint Call', 'Canadian Institute for Theoretical Astrophysics Support Program', 'Engage Plus Grants for Colleges'], dtype=object)
## Distribution of Discovery Funding Across Programs
(notdisc.groupby(['ProgramNameEN','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Reported Dollars invested across programs outside of Discovery")
<matplotlib.axes._subplots.AxesSubplot at 0x120a21b70>
committee = 1504
ax = df.loc[df['Committee'] == committee].groupby(['FiscalYear']).sum()['AwardAmount'].plot(
kind="bar", title="committee vs. Time")
ax.set_ylabel("Reported Dollars")
Text(0, 0.5, 'Reported Dollars')
EvaluationGroups = df.loc[(df['Committee']==1501)
| (df['Committee']==1502)
| (df['Committee']==1503)
| (df['Committee']==1504)
| (df['Committee']==1505)
| (df['Committee']==1506)
| (df['Committee']==1507)
| (df['Committee']==1508)
| (df['Committee']==1509)
| (df['Committee']==1510)
| (df['Committee']==1511)
| (df['Committee']==1512)]
## 2009 is an outlier so drop it. 2010 is also a bit weird.
EG = EvaluationGroups.loc[~(EvaluationGroups['FiscalYear']==2009)]
EG['FiscalYear'].unique()
array([2010, 2011, 2012, 2013, 2014, 2015, 2016])
## Total investment dollars vs Fiscal Year separated by Evaluation Group Committee
(EG.groupby(['Committee','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=False, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Total dollars invested by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x12bb8bda0>
Discipline Dynamics? The number and age of PIs served by each committee and each fiscal year changes.
## Number of Awards vs Fiscal Year separated by Evaluation Group Committee
(EG.groupby(['Committee','FiscalYear']).count()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=False, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Number of Awards by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x11e5b0080>
Calculuate an average? Divide the total dollars spent on a committee during a fiscal year by the number of awards given in that committee during that fiscal year.
## Total Dollars invested divided by number of awards vs. Fiscal Year separated by Evaluation Group Committee
((EG.groupby(['Committee','FiscalYear']).sum()
['AwardAmount'])/(EvaluationGroups.groupby(['Committee','FiscalYear']).count()
['AwardAmount'])).unstack().plot(
kind="bar", stacked=False, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Average Award Amount by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x12065bf98>
## Mean vs Fiscal Year separated by Evaluation Group Committee
(EG.groupby(['Committee','FiscalYear']).mean()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=False, cmap="viridis", alpha=0.7, figsize=(16, 8), width=1,
title="Number of Awards by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x120346da0>
## (Stacked) Sum vs Fiscal Year separated by Evaluation Group Committee
(EG.groupby(['Committee','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", figsize=(16, 8),
title="Sum of Awards by Evaluation Group Committee")
<matplotlib.axes._subplots.AxesSubplot at 0x1232bac88>
crc = df.loc[df['ProgramNameEN']=='Canada Research Chairs']
## CRCs by Instititution
(crc.groupby(['Institution','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", alpha=0.7, figsize=(24, 8), width=1, title="CRCs by Institution")
<matplotlib.axes._subplots.AxesSubplot at 0x128a41240>
crd = df.loc[df['ProgramNameEN'] == 'Collaborative Research and Development Grants']
## Number of Awards vs Fiscal Year separated by Evaluation Group Committee
(crd.groupby(['Institution','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", alpha=0.7, figsize=(24, 8), width=1, title = "CRDs by Institution")
<matplotlib.axes._subplots.AxesSubplot at 0x12d2eb4a8>
strat = df.loc[df['ProgramNameEN'] == 'Strategic Projects - Group']
## Strategic Project Awards vs Fiscal Year separated by Institution
(strat.groupby(['Institution','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", alpha=0.7, figsize=(24, 8), width=1,
title="Strategic Projects -- Group by Institution")
<matplotlib.axes._subplots.AxesSubplot at 0x12e098518>
discov = df.loc[df['ProgramNameEN'] == 'Discovery Grants Program - Individual']
## Strategic Project Awards vs Fiscal Year separated by Institution
(discov.groupby(['Institution','FiscalYear']).sum()
['AwardAmount'].unstack()).plot(
kind="bar", stacked=True, cmap="viridis", alpha=0.7, figsize=(24, 8), width=1,
title="Discover Grants -- Individual by Institution")
<matplotlib.axes._subplots.AxesSubplot at 0x12f4d0470>