import json
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import stats
import sklearn
%matplotlib inline
plt.style.use('ggplot')
with open("../json/filenames_topics_scores.json", 'r') as f:
topics_articles = json.load(f)
topics_articles.keys()
[u'Neuroscience', u'Cell biology']
header = ['article', 'subject']
for subject, articles in topics_articles.iteritems():
for pub_id, topics in articles.iteritems():
header.extend(topics)
print len(header)
print header[:20]
480 ['article', 'subject', 0.05900852995442989, u'mutant', 0.2085584692144273, u'reduced', 0.31061342923191704, u'amplitude', 0.1819439911921947, u'electrophysiological', 0.21618370087221447, u'mir9', 0.23960080551442975, u'aiv', 0.20781428583199402, u'vertes', 0.01683167050949445, u'class', 0.19853348284406702, u'conductance']