# Importing the libraries that will be useful here
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
from scipy.spatial.distance import pdist
import numpy as np
import os
%matplotlib inline
# reading in the data (I saved it to my local file as CSV)
ed = pd.read_csv('/Users/austinbrian/dev/blog/datasets/HERD2015_RandD_by_sector.csv')
# A quick look at the top of the dataset, to make sure everything came in OK
# I cheated a little here and went back to the CSV and eliminated commas, as it was a little easier to do that way
ed.head(5)
Rank | Institution | Environmental sciences | Life sciences | Math and computer sciences | Physical sciences | Psychology | Social sciences | Sciences, nec | Engineering | All non-S&E fields | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Johns Hopkins U. | 31854 | 867715 | 171205 | 167009 | 3663 | 11034 | 54640 | 991937 | 6622 |
1 | 32 | U. Illinois, Urbana-Champaign | 7214 | 220029 | 114512 | 67182 | 17276 | 21340 | 5000 | 161458 | 25806 |
2 | 24 | Georgia Institute of Technology | 19068 | 19879 | 113353 | 47279 | 7431 | 9132 | 7645 | 533329 | 8254 |
3 | 89 | Carnegie Mellon U. | 348 | 11212 | 109026 | 14162 | 7757 | 6791 | 3479 | 89054 | 175 |
4 | 28 | U. Southern California | 20051 | 411987 | 93765 | 16924 | 9935 | 27941 | 327 | 69527 | 40574 |
ed[ed.Rank==2]
Rank | Institution | Environmental sciences | Life sciences | Math and computer sciences | Physical sciences | Psychology | Social sciences | Sciences, nec | Engineering | All non-S&E fields | |
---|---|---|---|---|---|---|---|---|---|---|---|
24 | 2 | U. Michigan, Ann Arbor | 14609 | 779922 | 25434 | 52449 | 21989 | 149805 | 1627 | 254505 | 68938 |
# This function looks at the data to make sure I don't have any missing variables
def eda(dataframe):
print "Dataframe Shape", dataframe.shape
print ""
table = pd.DataFrame({"Missing": dataframe.isnull().sum(),
"Types": dataframe.dtypes,
"Uniques": [dataframe[i].nunique() for i in dataframe]})
print table
print ""
print "Describe Dataframe"
print dataframe.describe(include='all')
# After you make a function, you have to actually run it
eda(ed)
Dataframe Shape (640, 11) Missing Types Uniques Rank 0 int64 635 Institution 0 object 640 Environmental sciences 0 int64 408 Life sciences 0 int64 591 Math and computer sciences 0 int64 414 Physical sciences 0 int64 476 Psychology 0 int64 360 Social sciences 0 int64 397 Sciences, nec 0 int64 233 Engineering 0 int64 372 All non-S&E fields 0 int64 501 Describe Dataframe Rank Institution Environmental sciences \ count 640.000000 640 640.000000 unique NaN 640 NaN top NaN Louisiana State U., Baton Rouge NaN freq NaN 1 NaN mean 320.492188 NaN 5076.668750 std 184.885804 NaN 16247.201522 min 1.000000 NaN 0.000000 25% 160.750000 NaN 0.000000 50% 320.500000 NaN 198.500000 75% 480.250000 NaN 1997.000000 max 640.000000 NaN 169678.000000 Life sciences Math and computer sciences Physical sciences \ count 6.400000e+02 640.000000 640.000000 unique NaN NaN NaN top NaN NaN NaN freq NaN NaN NaN mean 6.065628e+04 4070.756250 7361.401563 std 1.483988e+05 13446.323577 20277.317083 min 0.000000e+00 0.000000 0.000000 25% 4.587500e+02 4.000000 30.750000 50% 2.295000e+03 208.500000 530.500000 75% 2.515100e+04 1818.000000 4464.250000 max 1.075635e+06 171205.000000 169149.000000 Psychology Social sciences Sciences, nec Engineering \ count 640.000000 640.000000 640.000000 640.000000 unique NaN NaN NaN NaN top NaN NaN NaN NaN freq NaN NaN NaN NaN mean 1850.025000 3627.956250 1690.635937 17297.278125 std 4576.223637 10928.929547 7561.262366 59004.695924 min 0.000000 0.000000 0.000000 0.000000 25% 0.000000 2.000000 0.000000 0.000000 50% 74.500000 168.500000 0.000000 208.500000 75% 1208.500000 1875.250000 262.250000 9678.250000 max 46707.000000 149805.000000 123658.000000 991937.000000 All non-S&E fields count 640.000000 unique NaN top NaN freq NaN mean 5662.442187 std 12943.654068 min 0.000000 25% 111.000000 50% 892.500000 75% 4414.750000 max 130711.000000
X_cols = ed.columns[2:]
plt.style.use('fivethirtyeight')
This function plots two dimensions by one another, and colorizes the points based on the overall school R&D value rank, with higher ranks being darker greens.
def plot_ed(var_X1,var_X2,c=ed.Rank,co="Greens_r"):
plt.figure(figsize=(8,6))
plt.scatter(ed[var_X1],ed[var_X2],c=c,cmap=co,alpha=.8)
plt.title(var_X1+' vs '+var_X2+' R&D Research $',fontsize=18)
plt.xlabel(var_X1,fontsize = 14)
plt.ylabel(var_X2,fontsize = 14)
plot_ed('Social sciences','Math and computer sciences')
plot_ed('Engineering','Social sciences')
plot_ed('Environmental sciences','Engineering')
plot_ed('Psychology','Life sciences')
This one's interesting! It's a bit of a closer relationship.
# Let's just plot them all.
sns.pairplot(ed[X_cols])
<seaborn.axisgrid.PairGrid at 0x116f85c50>
Interesting, but not super conclusive on anything.
I'm going to use both a DBSCAN clustering algorithm and a hierarchical algorithm to group schools. As the plots here show, there aren't a lot of clusters going on for any pairs, so I don't necessarily expect to get a whole lot.
The reason for looking at a cluster analysis is that we don't necessarily have a natural classification scheme for these schools. There isn't an obvious "target" to identify them as a type, so we want to find similarities within the dataset we have.
# more libraries
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn import datasets, linear_model, metrics
# DBSCAN
X = ed[X_cols].values
y = ed['Rank']
dbscn = DBSCAN(eps = 1000, min_samples = 4).fit(X) # played with epsilon and min samples
labels = dbscn.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
cluster_groups = {}
for i in set(labels):
cluster_groups[i]=0
for j in labels:
if j==i:
cluster_groups[i]+=1
else:
pass
cluster_groups
{-1: 387, 0: 245, 1: 4, 2: 4}
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(y, labels)) # 1 is best
print("Completeness: %0.3f" % metrics.completeness_score(y, labels)) # 1 is best
print("V-measure: %0.3f" % metrics.v_measure_score(y, labels)) # Harmonic mean
print("Silhouette Coefficient: %0.3f" # higher is better
% metrics.silhouette_score(X, labels))
Estimated number of clusters: 3 Homogeneity: 0.114 Completeness: 1.000 V-measure: 0.205 Silhouette Coefficient: -0.074
This is a bad score. Our data isn't really very "dense" so a density algorithm isn't going to give us a lot of value-add here.
Let's look at it anyway.
# This is just a quick reminder of the order of our variables
X_cols
Index([u'Environmental sciences', u'Life sciences', u'Math and computer sciences', u'Physical sciences', u'Psychology', u'Social sciences', u'Sciences, nec', u'Engineering', u'All non-S&E fields'], dtype='object')
plt.figure(figsize=(8,6))
unique_labels = np.unique(labels)
colors = plt.cm.Spectral(np.linspace(0,1, len(unique_labels)))
for (label, color) in zip(unique_labels, colors):
class_member_mask = (labels == label)
n = X[class_member_mask]
# Psychology and Life Sciences Plot again
plt.plot(n[:,4],n[:,1], 'o', markerfacecolor = color, markersize = 8,alpha=.3)
/Users/austinbrian/anaconda/lib/python2.7/site-packages/matplotlib/lines.py:1206: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future. if self._markerfacecolor != fc:
Yikes. All of our clusters but one are right around the origin - and even the lighter pink color and darker pink color aren't distinguishing features because I put a transparency setting so that I could see when the points overlapped.
# libraries
from scipy.cluster.hierarchy import dendrogram, linkage, cophenet
from scipy.spatial.distance import pdist
# We'll implement the actual clustering algorithm using the ward method:
Z = linkage(X, 'ward')
# We can calculate the cophenetic correlation coefficient to see how well our algorithm has measured the distances between the points:
c, coph_dists = cophenet(Z, pdist(X))
c
0.85448874350133697
Not bad!
# Dendrogram demonstrates hierarchy
plt.title('Dendrogram')
plt.xlabel('Index Numbers')
plt.ylabel('Distance')
dendrogram(
Z,
leaf_rotation=90.,
leaf_font_size=8.,)
plt.show()
It looks like there is some good separation of clusters right about the 1,000,000 point, so it's a good place to set a max value.
# we can see that no links exist above a distance of 200 - so we will set maximum distance at 200 and use the fclusters function from scipy.cluster.hierarchy, which will return our cluster ID's.
max_dist = 1000000
clusters = fcluster(Z, max_dist, criterion='distance')
clusters
# Let's plot our data and assign the class labels as the color:
plt.figure(figsize=(8,6))
plt.scatter(X[:,4], X[:,1], c=clusters, cmap='prism')
plt.show()
Very cool! There's some identifiable separation here. We can plot more maps this same way, but first let's clean up this one.
plot_ed('Psychology','Life sciences',c=clusters, co='prism')
plot_ed('Environmental sciences','Life sciences',c=clusters, co='prism')
plot_ed('Math and computer sciences','Physical sciences',c=clusters, co='prism')
These seem like useful clusters, but it would be more useful if we could show which institutions were in which cluster.
for i in clusters:
clust_zip = zip(ed.Institution, clusters)
clust_zip
[('Johns Hopkins U.', 6), ('U. Illinois, Urbana-Champaign', 4), ('Georgia Institute of Technology', 4), ('Carnegie Mellon U.', 2), ('U. Southern California', 3), ('Massachusetts Institute of Technology', 4), ('U. Texas, Austin', 2), ('Pennsylvania State U., University Park and Hershey Medical Center', 4), ('U. Maryland, College Park', 2), ('U. California, San Diego', 5), ('Brown U.', 2), ('U. Utah', 3), ('North Carolina State U.', 4), ('U. Minnesota, Twin Cities', 5), ('Indiana U., Bloomington', 3), ('U. Wisconsin-Madison', 5), ('U. Chicago', 3), ('Ohio State U.', 5), ('Stanford U.', 5), ('U. California, Los Angeles', 5), ('Virginia Polytechnic Institute and State U.', 4), ('Purdue U., West Lafayette', 4), ('Columbia U. in the City of New York', 5), ('Rutgers, State U. New Jersey, New Brunswick', 3), ('U. Michigan, Ann Arbor', 5), ('U. North Carolina, Chapel Hill', 5), ('U. Alabama, Huntsville', 1), ('U. Washington, Seattle', 5), ('New York U.', 3), ('U. Central Florida', 2), ('U. Tennessee, Knoxville', 2), ('Cornell U.', 5), ('U. Hawaii, Manoa', 2), ('U. Massachusetts, Amherst', 2), ('Texas A&M U., College Station and Health Science Center', 4), ('Michigan State U.', 4), ('Arizona State U.', 2), ('Princeton U.', 2), ('Iowa State U.', 2), ('SUNY, Stony Brook U.', 2), ('Duke U.', 5), ('U. Texas M. D. Anderson Cancer Center', 5), ('U. California, Irvine', 2), ('U. Pennsylvania', 5), ('Rice U.', 2), ('Harvard U.', 5), ('U. California, Berkeley', 4), ('George Mason U.', 1), ('U. Illinois, Chicago', 3), ('U. Alabama, Tuscaloosa', 1), ('SUNY, U. Buffalo', 3), ('Florida International U.', 2), ('U. Nebraska, Lincoln', 2), ('Rensselaer Polytechnic Institute', 2), ('U. Notre Dame', 2), ('U. California, Santa Barbara', 2), ('U. California, Davis', 5), ('Yale U.', 5), ('Mississippi State U.', 2), ('Northeastern U.', 1), ('Florida State U.', 2), ('U. Arizona', 4), ('U. Georgia', 3), ('New Jersey Institute of Technology', 2), ('California Institute of Technology', 2), ('U. Louisiana, Lafayette', 1), ('U. Colorado Boulder', 2), ('U. Florida', 5), ('U. Pittsburgh, Pittsburgh', 5), ('Oregon State U.', 2), ('Rockefeller U.', 3), ('U. Texas, Dallas', 1), ('U. Maryland, Baltimore County', 1), ('U. South Florida, Tampa', 3), ('Boston U.', 3), ('Clemson U.', 2), ('U. Virginia, Charlottesville', 3), ('Naval Postgraduate School', 1), ('U. Houston', 2), ('George Washington U.', 2), ('SUNY, U. Albany', 2), ('Air Force Institute of Technology', 1), ('Texas Tech U.', 2), ('SUNY, Binghamton U.', 1), ('U. Texas, El Paso', 1), ('Colorado State U., Fort Collins', 2), ('U. Delaware', 2), ('San Diego State U.', 1), ('Washington State U.', 2), ('Drexel U.', 2), ('U. Texas, Arlington', 1), ('Worcester Polytechnic Institute', 1), ('Kansas State U.', 2), ('U. North Texas, Denton', 1), ('U. North Carolina, Charlotte', 1), ('Tufts U.', 2), ('U. New Mexico', 2), ('Dartmouth C.', 2), ('U. California, Riverside', 2), ('Louisiana State U., Baton Rouge', 2), ('U. Connecticut', 2), ('DePaul U.', 1), ('Wright State U.', 1), ('Temple U.', 2), ('Syracuse U.', 1), ('U. Oregon', 1), ('North Dakota State U.', 2), ('Washington U., Saint Louis', 5), ('U. Missouri, Columbia', 2), ('U. Texas, San Antonio', 1), ('Oklahoma State U., Stillwater', 2), ('Rochester Institute of Technology', 1), ('U. Massachusetts, Lowell', 1), ('Wayne State U.', 2), ('U. California, Santa Cruz', 2), ('U.S. Air Force Academy', 1), ('Louisiana Tech U.', 1), ('U. South Carolina, Columbia', 2), ('Michigan Technological U.', 1), ('U. Kansas', 2), ('Indiana U.-Purdue U., Indianapolis', 1), ('U. Tulsa', 1), ('U. Rochester', 3), ('U. Memphis', 1), ('U. Kentucky', 3), ('Brandeis U.', 1), ('Georgia State U.', 1), ('Boise State U.', 1), ('Illinois Institute of Technology', 1), ('U. Massachusetts, Boston', 1), ('Utah State U.', 2), ('U. Idaho', 2), ('North Carolina Agricultural and Technical State U.', 1), ('Brigham Young U., Provo', 1), ('U. Iowa', 3), ('U. Nebraska, Omaha', 1), ('Toyota Technological Institute, Chicago', 1), ('Georgetown U.', 2), ('U. Dayton', 2), ('Kent State U.', 1), ('U. Miami', 3), ('Stevens Institute of Technology', 1), ('Texas State U.', 1), ('California State U., San Bernardino', 1), ('Northwestern U.', 5), ('Portland State U.', 1), ('Emory U.', 5), ('C. of William and Mary and Virginia Institute of Marine Science', 1), ('CUNY, City C.', 1), ('Missouri U. of Science and Technology', 1), ('New Mexico State U.', 2), ('Tulane U.', 2), ('Old Dominion U.', 1), ('Jackson State U.', 1), ('U. Oklahoma, Norman and Health Science Center', 2), ('Southern Methodist U.', 1), ('U. Nevada, Reno', 1), ('U. Alaska, Fairbanks', 1), ('San Francisco State U.', 1), ('U. Wisconsin-Milwaukee', 1), ('Virginia Commonwealth U.', 2), ('U. Cincinnati', 3), ('U. New Hampshire', 1), ('U. Missouri, Kansas City', 1), ('U. Wyoming', 1), ('U.S. Naval Academy', 1), ('Texas A&M U.-Corpus Christi', 1), ('U. North Carolina, general administration', 1), ('West Virginia U.', 2), ('U. Arkansas, Little Rock', 1), ('Harvey Mudd C.', 1), ('U. California, Office of the President', 1), ('U. Colorado Colorado Springs', 1), ('Lehigh U.', 1), ('U.S. Military Academy', 1), ('U. New Orleans', 1), ('American U.', 1), ('Delaware State U.', 1), ('Montana State U., Bozeman', 1), ('Baylor U.', 1), ('Villanova U.', 1), ('Bryn Mawr C.', 1), ('U. Massachusetts, Dartmouth', 1), ('U. Vermont', 2), ('Vanderbilt U.', 5), ('Alabama A&M U.', 1), ('Northern Arizona U.', 1), ('CUNY, Queens C.', 1), ('Florida Institute of Technology', 1), ('U. South Alabama', 1), ('U. Nevada, Las Vegas', 1), ('Gallaudet U.', 1), ('Bowie State U.', 1), ('Marquette U.', 1), ('U. Tennessee, Chattanooga', 1), ('California Polytechnic State U., San Luis Obispo', 1), ('U. Alabama, Birmingham', 5), ('Boston C.', 1), ('SUNY, Polytechnic Institute', 4), ('Tennessee Technological U.', 1), ('Tuskegee U.', 1), ('Desert Research Institute', 1), ('Clarkson U.', 1), ('Ball State U.', 1), ('Florida Atlantic U.', 1), ('Case Western Reserve U.', 3), ('Creighton U.', 1), ('Rutgers, State U. New Jersey, Newark', 1), ('U. Maine', 1), ('Howard U.', 1), ('U. Puerto Rico, Mayaguez', 1), ('California State U., Northridge', 1), ('Dakota State U.', 1), ('Elizabeth City State U.', 1), ('U. California, Merced', 1), ('Southern Illinois U., Carbondale', 1), ('Illinois State U.', 1), ('Norfolk State U.', 1), ('CUNY, system office', 1), ('Smith C.', 1), ('Western Washington U.', 1), ('U. Puerto Rico, Rio Piedras', 1), ('Williams C.', 1), ('South Dakota State U.', 1), ('Texas Southern U.', 1), ('Western Michigan U. and Homer Stryker M.D. School of Medicine', 1), ('Morgan State U.', 1), ('U. Montana, Missoula', 1), ('U. Akron', 1), ('Hampton U.', 1), ('Florida A&M U.', 1), ('U. Colorado Denver and Anschutz Medical Campus', 3), ('Miami U.', 1), ('California State U., Bakersfield', 1), ('U. South Dakota', 1), ('U. Texas, Brownsville', 1), ('Sam Houston State U.', 1), ('C. Charleston', 1), ('CUNY, Hunter C.', 1), ('U. Texas Pan American', 1), ('Oakland U.', 1), ('Stephen F. Austin State U.', 1), ('Loyola U., Chicago', 1), ('Willamette U.', 1), ('Northern Illinois U.', 1), ('Wellesley C.', 1), ('U. Louisville', 2), ('Fordham U.', 1), ('Towson U.', 1), ('U. Central Arkansas', 1), ('U. Mississippi', 2), ('U. Minnesota, Duluth', 1), ('Calvin C.', 1), ('California State U., San Marcos', 1), ('U. Houston-Downtown', 1), ('Tennessee State U.', 1), ('Pennsylvania State U., Harrisburg', 1), ('Pace U.', 1), ('U. Rhode Island', 1), ('Columbia U., Teachers C.', 1), ('California State U., Monterey Bay', 1), ('U. Southern Mississippi', 1), ('U. Denver', 1), ('Trinity C., Hartford', 1), ('Colorado School of Mines', 1), ('U. Metropolitana', 1), ('Idaho State U.', 1), ('U. Southern Maine', 1), ('U. Northern Colorado', 1), ('CUNY, John Jay C. of Criminal Justice', 1), ('Montclair State U.', 1), ('Prairie View A&M U.', 1), ('Pomona C.', 1), ('Carleton C.', 1), ('Duquesne U.', 1), ('California State U., Fresno', 1), ('Vassar C.', 1), ('Xavier U. Louisiana', 1), ('St. Olaf C.', 1), ('Cleveland State U.', 1), ('U. Arkansas, Pine Bluff', 1), ('Marist C.', 1), ('San Jose State U.', 1), ('CUNY, Brooklyn C.', 1), ('California State U., Sacramento', 1), ('U. North Carolina, Wilmington', 1), ('Fayetteville State U.', 1), ('New School', 1), ('Fairfield U.', 1), ('Mount Holyoke C.', 1), ('Alcorn State U.', 1), ('Wesleyan U.', 1), ('Auburn U., Auburn', 2), ('Lewis and Clark C.', 1), ('Rutgers, State U. New Jersey, Camden', 1), ('U. North Carolina, Greensboro', 1), ('Arkansas State U., Jonesboro', 1), ('Saint Louis U.', 1), ('U. North Florida', 1), ('Central Connecticut State U.', 1), ('Central Michigan U.', 1), ('CUNY, C. Staten Island', 1), ('Trinity U.', 1), ('Wake Forest U.', 2), ('Embry-Riddle Aeronautical U.', 1), ('Southern Illinois U., Edwardsville', 1), ('U. Washington, Bothell', 1), ('Southern U. and A&M C., Baton Rouge', 1), ('Amherst C.', 1), ('California State U., Channel Islands', 1), ('U. North Dakota', 1), ('Lafayette C.', 1), ('Purdue U., Calumet', 1), ('Loyola Marymount U.', 1), ('Clark Atlanta U.', 1), ('Spelman C.', 1), ('CUNY, Graduate Center', 1), ('Kean U.', 1), ('Southern Connecticut State U.', 1), ('Texas A&M International U.', 1), ('New Mexico Institute of Mining and Technology', 1), ('James Madison U.', 1), ('Virginia State U.', 1), ('U. Missouri, Saint Louis', 1), ('Salisbury U.', 1), ('Colgate U.', 1), ('Ohio U.', 1), ('West Chester U. Pennsylvania', 1), ('U. Houston-Clear Lake', 1), ('Texas A&M U.-Commerce', 1), ('Appalachian State U.', 1), ('U. Washington, Tacoma', 1), ('Pennsylvania State U., Behrend', 1), ('Georgia Southern U.', 1), ('U. Hawaii, Hilo', 1), ('East Tennessee State U.', 1), ('CUNY, Lehman C.', 1), ('Lamar U.', 1), ('Reed C.', 1), ('U. Wisconsin-Stevens Point', 1), ('New York Institute of Technology', 1), ('Bowdoin C.', 1), ('Barnard C.', 1), ('Claremont Graduate U.', 1), ('Macalester C.', 1), ('Bowling Green State U.', 1), ('Seattle U.', 1), ('Oberlin C.', 1), ('U. Arkansas, Fayetteville', 2), ('U. Central Oklahoma', 1), ('West Virginia State U.', 1), ('Kennesaw State U.', 1), ('Elon U.', 1), ('U. South Carolina, Aiken', 1), ('Wichita State U.', 1), ('Azusa Pacific U.', 1), ('Bates C.', 1), ('SUNY, C. of Environmental Science and Forestry', 1), ('West Texas A&M U.', 1), ('Benedict C.', 1), ('Morehouse C.', 1), ('U. Detroit Mercy', 1), ('Middle Tennessee State U.', 1), ('Valparaiso U.', 1), ('Grinnell C.', 1), ('Swarthmore C.', 1), ('Winthrop U.', 1), ('Grand Valley State U.', 1), ('C. of Saint Benedict', 1), ('California State U., Dominguez Hills', 1), ('Texas Christian U.', 1), ('C. Wooster', 1), ('U. West Florida', 1), ('Bradley U.', 1), ('Rowan U.', 1), ('Norwich U.', 1), ('U. Hartford', 1), ('La Salle U.', 1), ('U. del Turabo', 1), ('Siena C.', 1), ('Lincoln U., Jefferson City', 1), ("Saint John's U., Collegeville", 1), ('Bucknell U.', 1), ('Shaw U.', 1), ('Sonoma State U.', 1), ('Indiana U.-Purdue U., Fort Wayne', 1), ('U. Wisconsin-La Crosse', 1), ('U. San Francisco', 1), ('CUNY, Baruch C.', 1), ('U. Wisconsin-Oshkosh', 1), ('Kettering U.', 1), ('California State U., Long Beach', 1), ('Middlebury C.', 1), ('U. South Carolina, Beaufort', 1), ('Davidson C.', 1), ('Minnesota State U., Mankato', 1), ('Wiley C.', 1), ('East Central U.', 1), ('U. Baltimore', 1), ('Ithaca C.', 1), ('South Dakota School of Mines and Technology', 1), ('Western Kentucky U.', 1), ('South Carolina State U.', 1), ('Lawrence Technological U.', 1), ('Eastern Michigan U.', 1), ('Union C., Schenectady', 1), ("Saint Michael's C.", 1), ('U. Nebraska, Kearney', 1), ('U. Alaska, Anchorage', 1), ('Colorado C.', 1), ('U. West Georgia', 1), ('Florida Gulf Coast U.', 1), ('St. Cloud State U.', 1), ('U. Toledo', 1), ('Fort Valley State U.', 1), ("St. John's U., Manhattan", 1), ('Haverford C.', 1), ('U. Wisconsin-Green Bay', 1), ('Fisk U.', 1), ('U. of the Pacific', 1), ('U. Minnesota, Morris', 1), ('Missouri State U.', 1), ('Pepperdine U.', 1), ('CUNY, Medgar Evers C.', 1), ('Quinnipiac U.', 1), ('Hamilton C.', 1), ('East Carolina U.', 1), ('Hofstra U.', 1), ('U. Texas, Tyler', 1), ('Furman U.', 1), ('Colby C.', 1), ('SUNY, Geneseo', 1), ('Gonzaga U.', 1), ('U. Wisconsin-Platteville', 1), ('Hope C.', 1), ('California State U., Chico', 1), ('Claflin U.', 1), ('CUNY, York C.', 1), ('Suffolk U.', 1), ('Kentucky State U.', 1), ('California State U., Fullerton', 1), ('Skidmore C.', 1), ('Western Illinois U.', 1), ('Murray State U.', 1), ('Northern Kentucky U.', 1), ('McNeese State U.', 1), ('U. San Diego', 1), ('Savannah State U.', 1), ('Rider U.', 1), ('California State Polytechnic U., Pomona', 1), ('Indiana U., South Bend', 1), ('U. Richmond', 1), ('Eastern Connecticut State U.', 1), ('U. of the District of Columbia', 1), ('Marshall U.', 1), ('U. Northern Iowa', 1), ('Niagara U.', 1), ('Nova Southeastern U.', 1), ('Rhode Island School of Design', 1), ('U. of Mary Washington', 1), ('Chapman U.', 1), ('U. Michigan, Dearborn', 1), ('Roger Williams U.', 1), ('Hawaii Pacific U.', 1), ('Jacksonville State U.', 1), ("Texas Woman's U.", 1), ('Purdue U., North Central', 1), ('Central State U.', 1), ('Albany C. of Pharmacy and Health Sciences', 1), ('Franklin and Marshall C.', 1), ('Pacific U.', 1), ('Washington and Lee U.', 1), ('Saginaw Valley State U.', 1), ('Western Carolina U.', 1), ('Dickinson C.', 1), ("Saint Joseph's U.", 1), ('Coastal Carolina U.', 1), ('Wheaton C., Wheaton', 1), ('U. North Carolina, Asheville', 1), ('Youngstown State U.', 1), ('SUNY, C. Brockport', 1), ('Sewanee: U. of the South', 1), ('Santa Clara U.', 1), ('Lake Superior State U.', 1), ('U. California, San Francisco', 5), ('Baylor C. of Medicine', 5), ('Icahn School of Medicine at Mt. Sinai', 5), ('U. Texas Southwestern Medical Center', 5), ('U. Maryland, Baltimore', 3), ('Scripps Research Institute', 3), ('Uniformed Services U. of the Health Sciences', 3), ('Oregon Health and Science U.', 3), ('Yeshiva U.', 3), ('U. Massachusetts, Medical School', 3), ('Medical U. South Carolina', 3), ('U. Texas Health Science Center, Houston', 3), ('Woods Hole Oceanographic Institution', 2), ('Medical C. Wisconsin', 2), ('U. Texas Medical Branch', 2), ('U. Texas Health Science Center, San Antonio', 2), ('U. Nebraska, Medical Center', 2), ('U. Arkansas for Medical Sciences', 2), ('Thomas Jefferson U.', 2), ('Cold Spring Harbor Laboratory', 2), ('Rush U.', 2), ('Georgia Regents U.\t\t\t\t\t\t\t\t\t', 2), ('U. Tennessee, Knoxville, Institute of Agriculture', 1), ('U. Tennessee, Health Science Center', 2), ('U. Maryland, Center for Environmental Science', 1), ('Louisiana State U., Health Sciences Center \xe2\x80\x93 New Orleans', 1), ('U. North Texas, Health Science Center', 1), ('U. Puerto Rico, Medical Sciences Campus', 1), ('Eastern Virginia Medical School', 1), ('Van Andel Institute', 1), ('Texas Tech U., Health Sciences Center', 1), ('Morehouse School of Medicine', 1), ('Mercer U.', 1), ('SUNY, Upstate Medical U.', 1), ('SUNY, Downstate Medical Center', 1), ('Loma Linda U.', 1), ('Catholic U. of America', 1), ('Louisiana State U., Health Sciences Center - Shreveport', 1), ('Texas A&M U.-Kingsville', 1), ('U. of the Virgin Islands', 1), ('Texas Tech U., Health Sciences Center, El Paso', 1), ('Albany Medical C.', 1), ('New York Medical C.', 1), ('Northeast Ohio Medical U.', 1), ('Rosalind Franklin U. of Medicine and Science', 1), ('Charles R. Drew U. of Medicine and Science', 1), ('Meharry Medical C.', 1), ('North Carolina Central U.', 1), ('U. Texas Health Science Center, Tyler', 1), ('Montana Tech of U. Montana', 1), ('Humboldt State U.', 1), ('Rhode Island C.', 1), ('Tarleton State U.', 1), ('U. Maryland, Eastern Shore', 1), ("St. Edward's U.", 1), ('Dillard U.', 1), ('Midwestern U.', 1), ('Langston U.', 1), ('Ponce Health Sciences U.', 1), ('U. New England', 1), ('U. South Florida, Saint Petersburg', 1), ('Seton Hall U.', 1), ('Clark U.', 1), ('U. Central del Caribe', 1), ('U. Guam', 1), ('National Defense U.', 1), ('Alfred U.', 1), ('U. Massachusetts, central office', 1), ('California State U., Los Angeles', 1), ('Western U. of Health Sciences', 1), ('Southern U. and A&M C., Agricultural Research and Extension Center', 1), ('A. T. Still U.', 1), ('Milwaukee School of Engineering', 1), ('U. Oklahoma, Tulsa', 1), ('U. of the Sciences Philadelphia', 1), ('MGH Institute of Health Professions', 1), ('SUNY, C. of Optometry', 1), ('Roseman U. of Health Sciences', 1), ('Mills C.', 1), ('Touro U., Vallejo', 1), ('Memorial Sloan Kettering Cancer Center, Louis V. Gerstner Jr. Graduate S. of Biomedical Sciences', 1), ('Fuller Theological Seminary', 1), ('Eastern Washington U.', 1), ('Plymouth State U.', 1), ('Tougaloo C.', 1), ('Southeastern Louisiana U.', 1), ('Naval War C.', 1), ('Central Washington U.', 1), ('Philadelphia C. of Osteopathic Medicine', 1), ('Northwest Indian C.', 1), ('Black Hills State U.', 1), ('Erikson Institute', 1), ('SUNY, Buffalo State', 1), ('National U.', 1), ('Alabama State U.', 1), ('Edward Via C. of Osteopathic Medicine', 1), ('Pittsburg State U.', 1), ('Sul Ross State U.', 1), ('Commonwealth Medical C.', 1), ('Franklin W. Olin C. of Engineering', 1), ('Austin Peay State U.', 1), ('U. Illinois, Springfield', 1), ('Oregon Institute of Technology', 1), ('Mercyhurst U.', 1), ('Connecticut C.', 1), ('Grambling State U.', 1), ('Morehead State U.', 1), ('U. New Haven', 1), ('U. Texas, Permian Basin', 1), ('Keck Graduate Institute', 1), ('Wheeling Jesuit U.', 1), ('Palmer C. of Chiropractic, Davenport', 1), ('Eastern Kentucky U.', 1), ('U. Louisiana, Monroe', 1), ('Oklahoma State U., Center for Health Sciences', 1), ('U. South Florida, Sarasota-Manatee', 1), ('Nicholls State U.', 1), ('U. Alaska, Southeast', 1), ('Occidental C.', 1), ('California Maritime Academy', 1), ('Indiana State U.', 1), ('U. Puerto Rico, Cayey', 1), ('Maine Maritime Academy', 1), ('Marshall B. Ketchum U.', 1), ('Stockton U.', 1), ('U. Tampa', 1), ('Bastyr U.', 1), ('Albany State U.', 1), ('American Samoa Community C.', 1), ('Providence C.', 1), ('Emerson C.', 1), ('Salus U.', 1), ('Alaska Pacific U.', 1), ('Christopher Newport U.', 1), ('Augsburg C.', 1), ('Salish Kootenai C.', 1), ('U. del Este', 1), ('La Sierra U.', 1), ('SUNY, C. Plattsburgh', 1), ('Heidelberg U.', 1), ('SUNY, Farmingdale State C.', 1), ('Seattle Pacific U.', 1), ('California State U., Stanislaus', 1), ('New England C. of Optometry', 1), ('U. Western States', 1), ('U. Houston system administration', 1), ('Keene State C.', 1), ('Hobart and William Smith Colleges', 1), ('U. of the Incarnate Word', 1), ('U. Puerto Rico, Humacao', 1), ('Augustana C., Sioux Falls', 1), ('Barry U.', 1), ('CUNY, Advanced Science Research Center', 1), ('U. Redlands', 1), ('Doane C.', 1), ('Florida Polytechnic U.', 1)]
clust_zip[0][1]
6
cluster_dict = {}
for i in range(1,7):
clustername = []
for j,v in enumerate(clust_zip):
if clust_zip[j][1]==i:
clustername.append(clust_zip[j][0])
cluster_dict[i] = clustername
cluster_dict
{1: ['U. Alabama, Huntsville', 'George Mason U.', 'U. Alabama, Tuscaloosa', 'Northeastern U.', 'U. Louisiana, Lafayette', 'U. Texas, Dallas', 'U. Maryland, Baltimore County', 'Naval Postgraduate School', 'Air Force Institute of Technology', 'SUNY, Binghamton U.', 'U. Texas, El Paso', 'San Diego State U.', 'U. Texas, Arlington', 'Worcester Polytechnic Institute', 'U. North Texas, Denton', 'U. North Carolina, Charlotte', 'DePaul U.', 'Wright State U.', 'Syracuse U.', 'U. Oregon', 'U. Texas, San Antonio', 'Rochester Institute of Technology', 'U. Massachusetts, Lowell', 'U.S. Air Force Academy', 'Louisiana Tech U.', 'Michigan Technological U.', 'Indiana U.-Purdue U., Indianapolis', 'U. Tulsa', 'U. Memphis', 'Brandeis U.', 'Georgia State U.', 'Boise State U.', 'Illinois Institute of Technology', 'U. Massachusetts, Boston', 'North Carolina Agricultural and Technical State U.', 'Brigham Young U., Provo', 'U. Nebraska, Omaha', 'Toyota Technological Institute, Chicago', 'Kent State U.', 'Stevens Institute of Technology', 'Texas State U.', 'California State U., San Bernardino', 'Portland State U.', 'C. of William and Mary and Virginia Institute of Marine Science', 'CUNY, City C.', 'Missouri U. of Science and Technology', 'Old Dominion U.', 'Jackson State U.', 'Southern Methodist U.', 'U. Nevada, Reno', 'U. Alaska, Fairbanks', 'San Francisco State U.', 'U. Wisconsin-Milwaukee', 'U. New Hampshire', 'U. Missouri, Kansas City', 'U. Wyoming', 'U.S. Naval Academy', 'Texas A&M U.-Corpus Christi', 'U. North Carolina, general administration', 'U. Arkansas, Little Rock', 'Harvey Mudd C.', 'U. California, Office of the President', 'U. Colorado Colorado Springs', 'Lehigh U.', 'U.S. Military Academy', 'U. New Orleans', 'American U.', 'Delaware State U.', 'Montana State U., Bozeman', 'Baylor U.', 'Villanova U.', 'Bryn Mawr C.', 'U. Massachusetts, Dartmouth', 'Alabama A&M U.', 'Northern Arizona U.', 'CUNY, Queens C.', 'Florida Institute of Technology', 'U. South Alabama', 'U. Nevada, Las Vegas', 'Gallaudet U.', 'Bowie State U.', 'Marquette U.', 'U. Tennessee, Chattanooga', 'California Polytechnic State U., San Luis Obispo', 'Boston C.', 'Tennessee Technological U.', 'Tuskegee U.', 'Desert Research Institute', 'Clarkson U.', 'Ball State U.', 'Florida Atlantic U.', 'Creighton U.', 'Rutgers, State U. New Jersey, Newark', 'U. Maine', 'Howard U.', 'U. Puerto Rico, Mayaguez', 'California State U., Northridge', 'Dakota State U.', 'Elizabeth City State U.', 'U. California, Merced', 'Southern Illinois U., Carbondale', 'Illinois State U.', 'Norfolk State U.', 'CUNY, system office', 'Smith C.', 'Western Washington U.', 'U. Puerto Rico, Rio Piedras', 'Williams C.', 'South Dakota State U.', 'Texas Southern U.', 'Western Michigan U. and Homer Stryker M.D. School of Medicine', 'Morgan State U.', 'U. Montana, Missoula', 'U. Akron', 'Hampton U.', 'Florida A&M U.', 'Miami U.', 'California State U., Bakersfield', 'U. South Dakota', 'U. Texas, Brownsville', 'Sam Houston State U.', 'C. Charleston', 'CUNY, Hunter C.', 'U. Texas Pan American', 'Oakland U.', 'Stephen F. Austin State U.', 'Loyola U., Chicago', 'Willamette U.', 'Northern Illinois U.', 'Wellesley C.', 'Fordham U.', 'Towson U.', 'U. Central Arkansas', 'U. Minnesota, Duluth', 'Calvin C.', 'California State U., San Marcos', 'U. Houston-Downtown', 'Tennessee State U.', 'Pennsylvania State U., Harrisburg', 'Pace U.', 'U. Rhode Island', 'Columbia U., Teachers C.', 'California State U., Monterey Bay', 'U. Southern Mississippi', 'U. Denver', 'Trinity C., Hartford', 'Colorado School of Mines', 'U. Metropolitana', 'Idaho State U.', 'U. Southern Maine', 'U. Northern Colorado', 'CUNY, John Jay C. of Criminal Justice', 'Montclair State U.', 'Prairie View A&M U.', 'Pomona C.', 'Carleton C.', 'Duquesne U.', 'California State U., Fresno', 'Vassar C.', 'Xavier U. Louisiana', 'St. Olaf C.', 'Cleveland State U.', 'U. Arkansas, Pine Bluff', 'Marist C.', 'San Jose State U.', 'CUNY, Brooklyn C.', 'California State U., Sacramento', 'U. North Carolina, Wilmington', 'Fayetteville State U.', 'New School', 'Fairfield U.', 'Mount Holyoke C.', 'Alcorn State U.', 'Wesleyan U.', 'Lewis and Clark C.', 'Rutgers, State U. New Jersey, Camden', 'U. North Carolina, Greensboro', 'Arkansas State U., Jonesboro', 'Saint Louis U.', 'U. North Florida', 'Central Connecticut State U.', 'Central Michigan U.', 'CUNY, C. Staten Island', 'Trinity U.', 'Embry-Riddle Aeronautical U.', 'Southern Illinois U., Edwardsville', 'U. Washington, Bothell', 'Southern U. and A&M C., Baton Rouge', 'Amherst C.', 'California State U., Channel Islands', 'U. North Dakota', 'Lafayette C.', 'Purdue U., Calumet', 'Loyola Marymount U.', 'Clark Atlanta U.', 'Spelman C.', 'CUNY, Graduate Center', 'Kean U.', 'Southern Connecticut State U.', 'Texas A&M International U.', 'New Mexico Institute of Mining and Technology', 'James Madison U.', 'Virginia State U.', 'U. Missouri, Saint Louis', 'Salisbury U.', 'Colgate U.', 'Ohio U.', 'West Chester U. Pennsylvania', 'U. Houston-Clear Lake', 'Texas A&M U.-Commerce', 'Appalachian State U.', 'U. Washington, Tacoma', 'Pennsylvania State U., Behrend', 'Georgia Southern U.', 'U. Hawaii, Hilo', 'East Tennessee State U.', 'CUNY, Lehman C.', 'Lamar U.', 'Reed C.', 'U. Wisconsin-Stevens Point', 'New York Institute of Technology', 'Bowdoin C.', 'Barnard C.', 'Claremont Graduate U.', 'Macalester C.', 'Bowling Green State U.', 'Seattle U.', 'Oberlin C.', 'U. Central Oklahoma', 'West Virginia State U.', 'Kennesaw State U.', 'Elon U.', 'U. South Carolina, Aiken', 'Wichita State U.', 'Azusa Pacific U.', 'Bates C.', 'SUNY, C. of Environmental Science and Forestry', 'West Texas A&M U.', 'Benedict C.', 'Morehouse C.', 'U. Detroit Mercy', 'Middle Tennessee State U.', 'Valparaiso U.', 'Grinnell C.', 'Swarthmore C.', 'Winthrop U.', 'Grand Valley State U.', 'C. of Saint Benedict', 'California State U., Dominguez Hills', 'Texas Christian U.', 'C. Wooster', 'U. West Florida', 'Bradley U.', 'Rowan U.', 'Norwich U.', 'U. Hartford', 'La Salle U.', 'U. del Turabo', 'Siena C.', 'Lincoln U., Jefferson City', "Saint John's U., Collegeville", 'Bucknell U.', 'Shaw U.', 'Sonoma State U.', 'Indiana U.-Purdue U., Fort Wayne', 'U. Wisconsin-La Crosse', 'U. San Francisco', 'CUNY, Baruch C.', 'U. Wisconsin-Oshkosh', 'Kettering U.', 'California State U., Long Beach', 'Middlebury C.', 'U. South Carolina, Beaufort', 'Davidson C.', 'Minnesota State U., Mankato', 'Wiley C.', 'East Central U.', 'U. Baltimore', 'Ithaca C.', 'South Dakota School of Mines and Technology', 'Western Kentucky U.', 'South Carolina State U.', 'Lawrence Technological U.', 'Eastern Michigan U.', 'Union C., Schenectady', "Saint Michael's C.", 'U. Nebraska, Kearney', 'U. Alaska, Anchorage', 'Colorado C.', 'U. West Georgia', 'Florida Gulf Coast U.', 'St. Cloud State U.', 'U. Toledo', 'Fort Valley State U.', "St. John's U., Manhattan", 'Haverford C.', 'U. Wisconsin-Green Bay', 'Fisk U.', 'U. of the Pacific', 'U. Minnesota, Morris', 'Missouri State U.', 'Pepperdine U.', 'CUNY, Medgar Evers C.', 'Quinnipiac U.', 'Hamilton C.', 'East Carolina U.', 'Hofstra U.', 'U. Texas, Tyler', 'Furman U.', 'Colby C.', 'SUNY, Geneseo', 'Gonzaga U.', 'U. Wisconsin-Platteville', 'Hope C.', 'California State U., Chico', 'Claflin U.', 'CUNY, York C.', 'Suffolk U.', 'Kentucky State U.', 'California State U., Fullerton', 'Skidmore C.', 'Western Illinois U.', 'Murray State U.', 'Northern Kentucky U.', 'McNeese State U.', 'U. San Diego', 'Savannah State U.', 'Rider U.', 'California State Polytechnic U., Pomona', 'Indiana U., South Bend', 'U. Richmond', 'Eastern Connecticut State U.', 'U. of the District of Columbia', 'Marshall U.', 'U. Northern Iowa', 'Niagara U.', 'Nova Southeastern U.', 'Rhode Island School of Design', 'U. of Mary Washington', 'Chapman U.', 'U. Michigan, Dearborn', 'Roger Williams U.', 'Hawaii Pacific U.', 'Jacksonville State U.', "Texas Woman's U.", 'Purdue U., North Central', 'Central State U.', 'Albany C. of Pharmacy and Health Sciences', 'Franklin and Marshall C.', 'Pacific U.', 'Washington and Lee U.', 'Saginaw Valley State U.', 'Western Carolina U.', 'Dickinson C.', "Saint Joseph's U.", 'Coastal Carolina U.', 'Wheaton C., Wheaton', 'U. North Carolina, Asheville', 'Youngstown State U.', 'SUNY, C. Brockport', 'Sewanee: U. of the South', 'Santa Clara U.', 'Lake Superior State U.', 'U. Tennessee, Knoxville, Institute of Agriculture', 'U. Maryland, Center for Environmental Science', 'Louisiana State U., Health Sciences Center \xe2\x80\x93 New Orleans', 'U. North Texas, Health Science Center', 'U. Puerto Rico, Medical Sciences Campus', 'Eastern Virginia Medical School', 'Van Andel Institute', 'Texas Tech U., Health Sciences Center', 'Morehouse School of Medicine', 'Mercer U.', 'SUNY, Upstate Medical U.', 'SUNY, Downstate Medical Center', 'Loma Linda U.', 'Catholic U. of America', 'Louisiana State U., Health Sciences Center - Shreveport', 'Texas A&M U.-Kingsville', 'U. of the Virgin Islands', 'Texas Tech U., Health Sciences Center, El Paso', 'Albany Medical C.', 'New York Medical C.', 'Northeast Ohio Medical U.', 'Rosalind Franklin U. of Medicine and Science', 'Charles R. Drew U. of Medicine and Science', 'Meharry Medical C.', 'North Carolina Central U.', 'U. Texas Health Science Center, Tyler', 'Montana Tech of U. Montana', 'Humboldt State U.', 'Rhode Island C.', 'Tarleton State U.', 'U. Maryland, Eastern Shore', "St. Edward's U.", 'Dillard U.', 'Midwestern U.', 'Langston U.', 'Ponce Health Sciences U.', 'U. New England', 'U. South Florida, Saint Petersburg', 'Seton Hall U.', 'Clark U.', 'U. Central del Caribe', 'U. Guam', 'National Defense U.', 'Alfred U.', 'U. Massachusetts, central office', 'California State U., Los Angeles', 'Western U. of Health Sciences', 'Southern U. and A&M C., Agricultural Research and Extension Center', 'A. T. Still U.', 'Milwaukee School of Engineering', 'U. Oklahoma, Tulsa', 'U. of the Sciences Philadelphia', 'MGH Institute of Health Professions', 'SUNY, C. of Optometry', 'Roseman U. of Health Sciences', 'Mills C.', 'Touro U., Vallejo', 'Memorial Sloan Kettering Cancer Center, Louis V. Gerstner Jr. Graduate S. of Biomedical Sciences', 'Fuller Theological Seminary', 'Eastern Washington U.', 'Plymouth State U.', 'Tougaloo C.', 'Southeastern Louisiana U.', 'Naval War C.', 'Central Washington U.', 'Philadelphia C. of Osteopathic Medicine', 'Northwest Indian C.', 'Black Hills State U.', 'Erikson Institute', 'SUNY, Buffalo State', 'National U.', 'Alabama State U.', 'Edward Via C. of Osteopathic Medicine', 'Pittsburg State U.', 'Sul Ross State U.', 'Commonwealth Medical C.', 'Franklin W. Olin C. of Engineering', 'Austin Peay State U.', 'U. Illinois, Springfield', 'Oregon Institute of Technology', 'Mercyhurst U.', 'Connecticut C.', 'Grambling State U.', 'Morehead State U.', 'U. New Haven', 'U. Texas, Permian Basin', 'Keck Graduate Institute', 'Wheeling Jesuit U.', 'Palmer C. of Chiropractic, Davenport', 'Eastern Kentucky U.', 'U. Louisiana, Monroe', 'Oklahoma State U., Center for Health Sciences', 'U. South Florida, Sarasota-Manatee', 'Nicholls State U.', 'U. Alaska, Southeast', 'Occidental C.', 'California Maritime Academy', 'Indiana State U.', 'U. Puerto Rico, Cayey', 'Maine Maritime Academy', 'Marshall B. Ketchum U.', 'Stockton U.', 'U. Tampa', 'Bastyr U.', 'Albany State U.', 'American Samoa Community C.', 'Providence C.', 'Emerson C.', 'Salus U.', 'Alaska Pacific U.', 'Christopher Newport U.', 'Augsburg C.', 'Salish Kootenai C.', 'U. del Este', 'La Sierra U.', 'SUNY, C. Plattsburgh', 'Heidelberg U.', 'SUNY, Farmingdale State C.', 'Seattle Pacific U.', 'California State U., Stanislaus', 'New England C. of Optometry', 'U. Western States', 'U. Houston system administration', 'Keene State C.', 'Hobart and William Smith Colleges', 'U. of the Incarnate Word', 'U. Puerto Rico, Humacao', 'Augustana C., Sioux Falls', 'Barry U.', 'CUNY, Advanced Science Research Center', 'U. Redlands', 'Doane C.', 'Florida Polytechnic U.'], 2: ['Carnegie Mellon U.', 'U. Texas, Austin', 'U. Maryland, College Park', 'Brown U.', 'U. Central Florida', 'U. Tennessee, Knoxville', 'U. Hawaii, Manoa', 'U. Massachusetts, Amherst', 'Arizona State U.', 'Princeton U.', 'Iowa State U.', 'SUNY, Stony Brook U.', 'U. California, Irvine', 'Rice U.', 'Florida International U.', 'U. Nebraska, Lincoln', 'Rensselaer Polytechnic Institute', 'U. Notre Dame', 'U. California, Santa Barbara', 'Mississippi State U.', 'Florida State U.', 'New Jersey Institute of Technology', 'California Institute of Technology', 'U. Colorado Boulder', 'Oregon State U.', 'Clemson U.', 'U. Houston', 'George Washington U.', 'SUNY, U. Albany', 'Texas Tech U.', 'Colorado State U., Fort Collins', 'U. Delaware', 'Washington State U.', 'Drexel U.', 'Kansas State U.', 'Tufts U.', 'U. New Mexico', 'Dartmouth C.', 'U. California, Riverside', 'Louisiana State U., Baton Rouge', 'U. Connecticut', 'Temple U.', 'North Dakota State U.', 'U. Missouri, Columbia', 'Oklahoma State U., Stillwater', 'Wayne State U.', 'U. California, Santa Cruz', 'U. South Carolina, Columbia', 'U. Kansas', 'Utah State U.', 'U. Idaho', 'Georgetown U.', 'U. Dayton', 'New Mexico State U.', 'Tulane U.', 'U. Oklahoma, Norman and Health Science Center', 'Virginia Commonwealth U.', 'West Virginia U.', 'U. Vermont', 'U. Louisville', 'U. Mississippi', 'Auburn U., Auburn', 'Wake Forest U.', 'U. Arkansas, Fayetteville', 'Woods Hole Oceanographic Institution', 'Medical C. Wisconsin', 'U. Texas Medical Branch', 'U. Texas Health Science Center, San Antonio', 'U. Nebraska, Medical Center', 'U. Arkansas for Medical Sciences', 'Thomas Jefferson U.', 'Cold Spring Harbor Laboratory', 'Rush U.', 'Georgia Regents U.\t\t\t\t\t\t\t\t\t', 'U. Tennessee, Health Science Center'], 3: ['U. Southern California', 'U. Utah', 'Indiana U., Bloomington', 'U. Chicago', 'Rutgers, State U. New Jersey, New Brunswick', 'New York U.', 'U. Illinois, Chicago', 'SUNY, U. Buffalo', 'U. Georgia', 'Rockefeller U.', 'U. South Florida, Tampa', 'Boston U.', 'U. Virginia, Charlottesville', 'U. Rochester', 'U. Kentucky', 'U. Iowa', 'U. Miami', 'U. Cincinnati', 'Case Western Reserve U.', 'U. Colorado Denver and Anschutz Medical Campus', 'U. Maryland, Baltimore', 'Scripps Research Institute', 'Uniformed Services U. of the Health Sciences', 'Oregon Health and Science U.', 'Yeshiva U.', 'U. Massachusetts, Medical School', 'Medical U. South Carolina', 'U. Texas Health Science Center, Houston'], 4: ['U. Illinois, Urbana-Champaign', 'Georgia Institute of Technology', 'Massachusetts Institute of Technology', 'Pennsylvania State U., University Park and Hershey Medical Center', 'North Carolina State U.', 'Virginia Polytechnic Institute and State U.', 'Purdue U., West Lafayette', 'Texas A&M U., College Station and Health Science Center', 'Michigan State U.', 'U. California, Berkeley', 'U. Arizona', 'SUNY, Polytechnic Institute'], 5: ['U. California, San Diego', 'U. Minnesota, Twin Cities', 'U. Wisconsin-Madison', 'Ohio State U.', 'Stanford U.', 'U. California, Los Angeles', 'Columbia U. in the City of New York', 'U. Michigan, Ann Arbor', 'U. North Carolina, Chapel Hill', 'U. Washington, Seattle', 'Cornell U.', 'Duke U.', 'U. Texas M. D. Anderson Cancer Center', 'U. Pennsylvania', 'Harvard U.', 'U. California, Davis', 'Yale U.', 'U. Florida', 'U. Pittsburgh, Pittsburgh', 'Washington U., Saint Louis', 'Northwestern U.', 'Emory U.', 'Vanderbilt U.', 'U. Alabama, Birmingham', 'U. California, San Francisco', 'Baylor C. of Medicine', 'Icahn School of Medicine at Mt. Sinai', 'U. Texas Southwestern Medical Center'], 6: ['Johns Hopkins U.']}
for i in cluster_dict[6]:
print str(i)+";",
Johns Hopkins U.;
for i in cluster_dict:
print i, len(cluster_dict[i])
1 496 2 75 3 28 4 12 5 28 6 1