#!/usr/bin/env python # coding: utf-8 # # Spreadsheet Generation Report # # ### 12th August 2014 Neil D. Lawrence # # This notebook contains scripts for creating spreadsheets that summarize the latest state of reviews. The script goes through the calibrated reviews (as generated by [this notebook](./Reviewer Calibration.ipynb)) and seeks papers which seem problematic in terms of either the span of the review scores, the confidence of the reviewers, the length of the reviews or those that are in the 'grey area' for publication. For each paper it creates a set of comments, which is then emailed out to area chairs. # In[1]: import cmtutils.cmtutils as cu import pandas as pd import os date = '2014-08-12' # Firstly we read in the processed reviews. The processing includes the calibrated review scores and the probability of accept. # In[ ]: groups = cu.pc_groupings(buddy_pair_key, '2014-08-12_conflicts.tsv', assignment_file ='2014-08-12_area_chair_assignments.xml') # Create a report from the reviews and generate inside the structure the comments ready for depositing on spreadsheets. # In[5]: report = cu.review_report(filename='2014-08-12_processed_reviews.csv') report.spreadsheet_comments() # Now load in the area chair assignments. # In[9]: a = cu.assignment() a.load_assignment(filename='2014-08-11_area_chair_assignments.xml', reviewer_type='metareviewer',) # And read in conflicts to ensure we don't show area chairs papers for which they are conflicted. # In[10]: # Read from the TSV format CMT provide. filename = '2014-08-11_conflicts.tsv' with open(os.path.join(cu.cmt_data_directory, filename)) as fin: rows = ( line.strip().split('\t') for line in fin) conflicts_groups = { row[0]:row[1:] for row in rows} papers = conflicts_groups.keys() conflicts_by_reviewer = {} for paper in papers: for reviewer in conflicts_groups[paper]: if reviewer in conflicts_by_reviewer: conflicts_by_reviewer[reviewer].append(paper) else: conflicts_by_reviewer[reviewer] = [paper] # We've stored the buddy pairs in a google doc, load them in. # In[11]: buddy_pair_key = '19nxaglIIzJsPuF54matL2JVIvLrkYPrSPiwPYp_sXTE' bp = cu.google_doc(spreadsheet_key=buddy_pair_key) buddy_pairs = bp.read() # We want the area chairs first and second names for the spreadsheets. Load in the reviewer data base where these are stored. # In[12]: db = cu.reviewerdb('reviewers.db') reviewers = db.to_data_frame() # Create google spreadsheets for each buddy pair, and a separate spreadsheet for the reviewer of the conflicted papers. # In[14]: program_chairs = ['n.lawrence@sheffield.ac.uk', 'corinnanips@gmail.com', 'alan.saul@sheffield.ac.uk'] spreadsheet_keys = {} conflict_list = {} num_papers = 0 paper_list = {} paper_conflict = 0 sort_order = ['prob_accept', 'attention_score'] for pair in sorted(set(buddy_pairs.index), key=int): paper_list[pair] = [] pair_df = buddy_pairs.loc[pair] for index, buddy in pair_df.iterrows(): conflict_papers = [] for chair in pair_df['area_chair']: conflict_papers += conflicts_by_reviewer[chair] conflict_list[chair] = [] for paper in a.assignment_reviewer['metareviewer'][buddy['area_chair']]: if paper in conflict_papers: conflict_list[chair].append(paper) else: paper_list[pair].append(paper) num_papers+=len(paper_list[pair]) print(index, len(paper_list[pair]), 'papers') buddy_name = {} for index, buddy in pair_df.iterrows(): email = buddy['area_chair'] i = reviewers.index[reviewers['Email'] == email] reviewer_name = reviewers.loc[i[0]]['FirstName'] + ' ' + reviewers.loc[i[0]]['LastName'] buddy_name[email] = reviewer_name ds = cu.google_doc(title="Review Summary Sheet: " + buddy['pair'] + '---' + ', '.join(buddy_name.values())) comment="""Click Me for Notes! Based on processed reviews form 2014/8/12. This report gives the status of the papers that don't conflict within your buddy-pair. Please use it to identify papers where there may be ongoing problems. Look out for papers with a high attention score and little or no discussion. Your notes can be placed in the 'note' column. Tentative accept/talk/spotlight decisions can be made by placing a 'y' for yes or 'm' for maybe in the relevant column.""" ds.write(report.attention_report.loc[paper_list[pair]].sort(sort_order, ascending=False), comment=comment) ds.share(users=buddy_name.keys() + program_chairs, send_notifications=True) spreadsheet_keys[buddy['pair']] = ds.spreadsheet_key comment="""These are papers that conflict with your buddy pair, they will need to be dealt with separately. Based on processed reviews form 2014/8/12.""" for index, buddy in pair_df.iterrows(): email = buddy['area_chair'] personal_papers = list(set(a.assignment_reviewer['metareviewer'][email]) - set(paper_list[pair])) if len(personal_papers)>0: ds = cmtutils.google_doc(title="Review Summary Sheet: " + buddy_name[email]) ds.write(report.attention_report.loc[personal_papers].sort(sort_order, ascending=False), comment=comment) ds.share(users=[email] + program_chairs, send_notifications=True) spreadsheet_keys[email] = ds.spreadsheet_key print(num_papers, 'total papers without conflicts') # Save the spreadsheet keys (these will be vital for updating the spreadsheets with new review scores at a later date!). # # In[17]: import pickle pickle.dump(spreadsheet_keys, open(os.path.join(cu.cmt_data_directory,"spreadsheet_keys.pickle"), "wb" ) ) # In[19]: import pickle load_keys = pickle.load(open(os.path.join(cu.cmt_data_directory, "spreadsheet_keys.pickle"), "rb"))