#!/usr/bin/env python # coding: utf-8 # # 1.1 Getting started # # ## Prerequisites # # ### Installation # # This tutorial requires **signac**, so make sure to install the package before starting. # The easiest way to do so is using conda: # # ```$ conda config --add channels conda-forge``` # # ```$ conda install signac``` # # or pip: # # ```pip install signac --user``` # # # Please refer to the [documentation](https://docs.signac.io/en/latest/installation.html#installation) for detailed instructions on how to install signac. # # After successful installation, the following cell should execute without error: # In[1]: import signac # We start by removing all data which might be left-over from previous executions of this tutorial. # In[2]: get_ipython().run_line_magic('rm', '-rf projects/tutorial/workspace') # ## A minimal example # # For this tutorial we want to compute the volume of an ideal gas as a function of its pressure and thermal energy using the ideal gas equation # # $p V = N kT$, where # # $N$ refers to the system size, $p$ to the pressure, $kT$ to the thermal energy and $V$ is the volume of the system. # In[3]: def V_idg(N, kT, p): return N * kT / p # We can execute the complete study in just a few lines of code. # First, we initialize the project directory and get a project handle: # In[4]: import signac project = signac.init_project(name="TutorialProject", root="projects/tutorial") # We iterate over the variable of interest *p* and construct a complete state point *sp* which contains all the meta data associated with our data. # In this simple example the meta data is very compact, but in principle the state point may be highly complex. # # Next, we obtain a *job* handle and store the result of the calculation within the *job document*. # The *job document* is a persistent dictionary for storage of simple key-value pairs. # Here, we exploit that the state point dictionary *sp* can easily be passed into the `V_idg()` function using the [keyword expansion syntax](https://docs.python.org/dev/tutorial/controlflow.html#keyword-arguments) (`**sp`). # In[5]: for p in 0.1, 1.0, 10.0: sp = {"p": p, "kT": 1.0, "N": 1000} job = project.open_job(sp) job.document["V"] = V_idg(**sp) # We can then examine our results by iterating over the data space: # In[6]: for job in project: print(job.sp.p, job.document["V"]) # That's it. # # ... # # Ok, there's more... # Let's have a closer look at the individual components. # # ## The Basics # # The **signac** data management framework assists the user in managing the data space of individual *projects*. # All data related to one or multiple projects is stored in a *workspace*, which by default is a directory called `workspace` within the project's root directory. # In[7]: print(project.root_directory()) print(project.workspace()) # The core idea is to tightly couple state points, unique sets of parameters, with their associated data. # In general, the parameter space needs to contain all parameters that will affect our data. # # For the ideal gas that is a 3-dimensional space spanned by the thermal energy *kT*, the pressure *p* and the system size *N*. # These are the **input parameters** for our calculations, while the calculated volume *V* is the **output data**. # In terms of **signac** this relationship is represented by an instance of `Job`. # # We use the `open_job()` method to get a *job handle* for a specific set of input parameters. # In[8]: job = project.open_job({"p": 1.0, "kT": 1.0, "N": 1000}) # The *job* handle tightly couples our input parameters (*p*, *kT*, *N*) with the storage location of the output data. # You can inspect both the input parameters and the storage location explicitly: # In[9]: print(job.statepoint()) print(job.workspace()) # For convenience, a job's *state point* may also be accessed via the short-hand `sp` attribute. # For example, to access the pressure value `p` we can use either of the two following expressions: # In[10]: print(job.statepoint()["p"]) print(job.sp.p) # Each *job* has a **unique id** representing the state point. # This means opening a job with the exact same input parameters is guaranteed to have the **exact same id**. # In[11]: job2 = project.open_job({"kT": 1.0, "N": 1000, "p": 1.0}) print(job.id, job2.id) # The *job id* is used to uniquely identify data associated with a specific state point. # Think of the *job* as a container that is used to store all data associated with the state point. # For example, it should be safe to assume that all files that are stored within the job's workspace directory are tightly coupled to the job's statepoint. # In[12]: print(job.workspace()) # Let's store the volume calculated for each state point in a file called `V.txt` within the job's workspace. # In[13]: import os fn_out = os.path.join(job.workspace(), "V.txt") with open(fn_out, "w") as file: V = V_idg(**job.statepoint()) file.write(str(V) + "\n") # Because this is such a common pattern, **signac** signac allows you to short-cut this with the `job.fn()` method. # In[14]: with open(job.fn("V.txt"), "w") as file: V = V_idg(**job.statepoint()) file.write(str(V) + "\n") # Sometimes it is easier to temporarily switch the *current working directory* while storing data for a specific job. # For this purpose, we can use the `Job` object as [context manager](https://docs.python.org/3/reference/compound_stmts.html#with). # This means that we switch into the workspace directory associated with the job after entering, and switch back into the original working directory after exiting. # In[15]: with job: with open("V.txt", "w") as file: file.write(str(V) + "\n") # Another alternative to store light-weight data is the *job document* as shown in the minimal example. # The *job document* is a persistent JSON storage file for simple key-value pairs. # In[16]: job.document["V"] = V_idg(**job.statepoint()) print(job.statepoint(), job.document) # Since we are usually interested in more than one state point, the standard operation is to iterate over all variable(s) of interest, construct the full state point, get the associated job handle, and then either just initialize the job or perform the full operation. # In[17]: for pressure in 0.1, 1.0, 10.0: statepoint = {"p": pressure, "kT": 1.0, "N": 1000} job = project.open_job(statepoint) job.document["V"] = V_idg(**job.statepoint()) # Let's verify our result by inspecting the data. # In[18]: for job in project: print(job.statepoint(), job.document) # Those are the basics for using **signac** for data management. # The [next section](signac_102_Exploring_Data.ipynb) demonstrates how to explore an existing data space.