This notebook documents the process of
We first need a filename for the ZIP archive.
zipfilename = '/Users/dwinston/Dropbox/best/structures/ever.zip'
Let's create a list of structures from the ZIP archive's CIF files. Anything invalid about the ZIP archive or CIF files will raise an exception here.
from zipfile import ZipFile
from pymatgen.io.cif import CifParser
structures = []
myzip = ZipFile(zipfilename, 'r')
for name in myzip.namelist():
with myzip.open(name) as cif_file:
structures.extend(CifParser(cif_file).get_structures())
len(structures)
Reject structures already on MP web site.
from pymatgen import MPRester
mpr = MPRester()
mp_ids = []
new_structures = []
for s in structures:
found = mpr.find_structure(s)
if len(found) > 0:
mp_ids.extend(found)
else:
new_structures.append(s)
if len(mp_ids) > 0:
print("Filtered out structures already on MP: {}".format(mp_ids))
len(new_structures)
Create a mock "job" for each structure, and then simulate the checks the submission processor does to reject jobs. The structures that pass here will actually spawn a ready workflow, so we will filter for such structures.
from pymatgen import Composition
from pymatgen.matproj.snl import StructureNL
def get_meta_from_structure(structure):
"""Used by `structure_to_mock_job`, to "fill out" a job document."""
comp = structure.composition
elsyms = sorted(set([e.symbol for e in comp.elements]))
meta = {'nsites': len(structure),
'elements': elsyms,
'nelements': len(elsyms),
'formula': comp.formula,
'reduced_cell_formula': comp.reduced_formula,
'reduced_cell_formula_abc': Composition(comp.reduced_formula)
.alphabetical_formula,
'anonymized_formula': comp.anonymized_formula,
'chemsystem': '-'.join(elsyms),
'is_ordered': structure.is_ordered,
'is_valid': structure.is_valid()}
return meta
def structure_to_mock_job(structure):
# Needs at least one author. This is for a mock job, so can put whatever.
snl = StructureNL(structure, [{"name": "Evgraf Fedorov", "email": "symmetry@ftw.org"}])
job = snl.as_dict()
if 'is_valid' not in job: job.update(get_meta_from_structure(snl.structure))
sorted_structure = snl.structure.get_sorted_structure()
job.update(sorted_structure.as_dict())
return job
# mpworks.processors.process_submissions.SubmissionProcessor#submit_new_workflow
MAX_SITES = 200 # SubmissionProcessor.MAX_SITES above
# from mpworks.workflows.wf_utils import NO_POTCARS
NO_POTCARS = ['Po', 'At', 'Rn', 'Fr', 'Ra', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr']
def job_is_submittable(job):
snl = StructureNL.from_dict(job)
if len(snl.structure.sites) > MAX_SITES:
print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format(
snl.structure.formula, len(snl.structure.sites))
elif not job['is_valid']:
print 'REJECTED WORKFLOW FOR {} - invalid structure (atoms too close)'.format(
snl.structure.formula)
elif len(set(NO_POTCARS) & set(job['elements'])) > 0:
print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format(
snl.structure.formula)
elif not job['is_ordered']:
print 'REJECTED WORKFLOW FOR {} - invalid structure (disordered)'.format(
snl.structure.formula)
else:
return True
return False
# No longer need separate reference for new_structures
structures = new_structures
submittables = []
for s in structures:
if job_is_submittable(structure_to_mock_job(s)):
submittables.append(s)
If there are issues with the metadata, an exception will be raised on attempting to create snl_list
.
# No longer need separate reference for submittables
structures = submittables
# List of (name, email) pairs
authors = [
('Evgraf Fedorov', 'symmetry@ftw.org'),
('Arthur Schoenflies', 'art@berlin.de'),
]
# BiBTeX string of references
references = """
@article{Graf1961,
author = {Graf, Donald L},
journal = {American Mineralogist},
number = {11},
pages = {1283--1316},
title = {{Crystallographic tables for the rhombohedral carbonates}},
volume = {46},
year = {1961}
}
@article{Akao_1977,
author = {Akao, M and Iwai, S},
doi = {10.1107/s0567740877005834},
journal = {Acta Crystallogr Sect B},
month = {apr},
number = {4},
pages = {1273--1275},
publisher = {International Union of Crystallography ({\{}IUCr{\}})},
title = {{The hydrogen bonding of hydromagnesite}},
url = {http://dx.doi.org/10.1107/s0567740877005834},
volume = {33},
year = {1977}
}
"""
# Projects? List of strings.
projects = []
# Remarks? List of strings.
remarks = []
snl_list = StructureNL.from_structures(structures, authors, references=references,
projects=projects, remarks=remarks)
# Using v1 endpoint
mpr = MPRester(endpoint="https://www.materialsproject.org/rest/v1")
#mpr.submit_snl(snl_list)