import requests import json import pprint import tabulate import collections sessions = [] for i in range(1, 95): url = f'https://agu.confex.com/agu/fm22/meetingapi.cgi/Search/0?date=2022-08-10T15%3A56%3A36&configtype=meetingapp_prelim&sort=Relevance&size=10&page={i}&ModelType=Session' response = requests.get(url) data = json.loads(response.text) lists = data['ChildList_Hits'] for _list in lists: session_data = { 'id': _list['id'], 'title': _list['Title'] } sessions.append(session_data) if i % 10 == 0: print(f'Retrieved {i*10} of 938 sessions.') len(sessions) abstracts = [] for i, session in enumerate(sessions): url = f"https://agu.confex.com/agu/fm22/meetingapi.cgi/Session/{session['id']}/ChildList_Paper" response = requests.get(url) abstract = json.loads(response.text) abstract_data = { 'session_id': session['id'], 'session_title': session['title'], 'num_abstract': len(abstract) } abstracts.append(abstract_data) i += 1 if i % 50 == 0: print(f'Retrieved {i} of {len(sessions)} sessions') sorted_abstract = sorted(abstracts, key=lambda d: d['num_abstract'], reverse=True) seen = set() sorted_unique_abstract = [] rank = 1 last_count = 0 for i, d in enumerate(sorted_abstract): t = tuple(d.items()) if t not in seen: num_abstract = d['num_abstract'] if i == 0: last_count = num_abstract if num_abstract != last_count: last_count = num_abstract rank += 1 seen.add(t) _d = collections.OrderedDict() _d['session_id'] = d['session_id'] _d['num_abstract'] = d['num_abstract'] _d['rank'] = rank _d['session_title'] = d['session_title'] sorted_unique_abstract.append(_d) header = sorted_unique_abstract[0].keys() rows = [x.values() for x in sorted_unique_abstract] print(tabulate.tabulate(rows, header, tablefmt='grid'))