import os, getpass os.environ['MPDS_KEY'] = getpass.getpass() !pip install mpds_client from __future__ import division import time import random import threading from mpds_client import MPDSDataRetrieval, MPDSDataTypes ml_data = { 'isothermal bulk modulus': {'bounds': [5, 265], 'units': 'GPa'}, 'enthalpy of formation': {'bounds': [-325, 0], 'units': 'kJ g-at.-1'}, 'heat capacity at constant pressure': {'bounds': [11, 28], 'units': 'J K-1 g-at.-1'}, 'Seebeck coefficient': {'bounds': [-150, 225], 'units': 'muV K-1'}, 'values of electronic band gap': {'bounds': [0.5, 10], 'units': 'eV'}, # NB both direct & indirect 'temperature for congruent melting': {'bounds': [300, 2700], 'units': 'K'}, 'Debye temperature': {'bounds': [175, 1100], 'units': 'K'}, 'linear thermal expansion coefficient': {'bounds': [1.0E-06, 9.5E-05], 'units': 'K-1'} } bound_tolerance_factor = 15 extremes, extremes_intersects = {}, {} def mpds_download_worker(prop, min_bound, max_bound): ''' A parallelizable worker ''' print("---Starting with %s" % prop) client = MPDSDataRetrieval(dtype=MPDSDataTypes.MACHINE_LEARNING) min_entries, max_entries = [], [] for item in client.get_data({"props": prop}, fields={'P':[ 'sample.material.entry', 'sample.material.phase_id', 'sample.material.chemical_formula', 'sample.measurement[0].property.scalar' ]}): if item[3] < min_bound: min_entries.append(item) elif item[3] > max_bound: max_entries.append(item) for item in list(min_entries) + list(max_entries): keep_info = [prop, item[0]] + item[2:] if item[1] in extremes: extremes_intersects.setdefault(item[1], []).append(keep_info) else: extremes[item[1]] = keep_info start_time = time.time() threads = [] ml_props = list(ml_data.keys()) for even, odd in zip(ml_props[0::2], ml_props[1::2]): print("---Preparing a pair of %s & %s" % (even, odd)) for key in [even, odd]: # adjust bounds to match entries near the margin margin = (ml_data[key]['bounds'][1] - ml_data[key]['bounds'][0]) / bound_tolerance_factor ml_data[key]['bounds'] = [ml_data[key]['bounds'][0] + margin, ml_data[key]['bounds'][1] - margin] # run in parallel thread = threading.Thread(target=mpds_download_worker, args=[key] + ml_data[key]['bounds']) thread.start() threads.append(thread) for thread in threads: thread.join() for phase_id in extremes_intersects: extremes_intersects[phase_id].append(extremes[phase_id]) for phase_id in sorted(extremes_intersects.keys()): print("*" * 30 + " Distinct phase https://mpds.io/#phase_id/%s " % phase_id + "*" * 30) for card in extremes_intersects[phase_id]: print("%s (%s) %s = %s %s" % ( card[2], card[1], card[0], card[3], ml_data[card[0]]['units'] )) print("Done in %1.2f sc" % (time.time() - start_time))