#!/usr/bin/env python # coding: utf-8 # # Example 1: Sandstone Model # In[1]: # Importing import theano.tensor as T import theano import sys, os sys.path.append("../GeMpy") sys.path.append("../") # Importing GeMpy modules import gempy as GeMpy # Reloading (only for development purposes) import importlib importlib.reload(GeMpy) # Usuful packages import numpy as np import pandas as pn import matplotlib.pyplot as plt # This was to choose the gpu os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Default options of printin np.set_printoptions(precision = 6, linewidth= 130, suppress = True) #%matplotlib inline get_ipython().run_line_magic('matplotlib', 'inline') # In[3]: # Importing the data from csv files and settign extent and resolution geo_data = GeMpy.create_data([696000,747000,6863000,6950000,-20000, 200],[ 50, 50, 50], path_f = os.pardir+"/input_data/a_Foliations.csv", path_i = os.pardir+"/input_data/a_Points.csv") # Assigning series to formations as well as their order (timewise) GeMpy.set_data_series(geo_data, {"EarlyGranite_Series":geo_data.formations[-1], "BIF_Series":(geo_data.formations[0], geo_data.formations[1]), "SimpleMafic_Series":geo_data.formations[2]}, order_series = ["EarlyGranite_Series", "BIF_Series", "SimpleMafic_Series"], verbose=0) # In[4]: GeMpy.data_to_pickle(geo_data, 'sandstone') # In[5]: inter = GeMpy.InterpolatorInput(geo_data) # In[8]: inter.interpolator.tg.n_formation.get_value() # In[2]: import numpy as np np.zeros((100,0)) # In[5]: 100000/1000 # In[ ]: # In[3]: GeMpy.plot_data(geo_data) # In[3]: geo_data.formations # In[5]: di = GeMpy.InterpolatorInput(geo_data) # In[8]: di.data.get_formation_number() # In[4]: geo_data_s = GeMpy.select_series(geo_data, ['EarlyGranite_Series']) # In[4]: # Preprocessing data to interpolate: This rescales the coordinates between 0 and 1 for stability issues. # Here we can choose also the drift degree (in new updates I will change it to be possible to change the # grade after compilation). From here we can set also the data type of the operations in case you want to # use the GPU. Verbose is huge. There is a large list of strings that select what you want to print while # the computation. data_interp = GeMpy.set_interpolator(geo_data, dtype="float32", verbose=[]) # In[6]: # This cell will go to the backend # Set all the theano shared parameters and return the symbolic variables (the input of the theano function) input_data_T = data_interp.interpolator.tg.input_parameters_list() # Prepare the input data (interfaces, foliations data) to call the theano function. #Also set a few theano shared variables with the len of formations series and so on input_data_P = data_interp.interpolator.data_prep() # Compile the theano function. debugging = theano.function(input_data_T, data_interp.interpolator.tg.whole_block_model(), on_unused_input='ignore', allow_input_downcast=True, profile=True) # In[12]: get_ipython().run_cell_magic('timeit', '', '# Solve model calling the theano function\nsol = debugging(input_data_P[0], input_data_P[1], input_data_P[2], input_data_P[3],input_data_P[4], input_data_P[5])\n') # In[16]: lith = sol[-1,0,:] # In[17]: np.save('sandstone_lith', lith) # In[22]: a = geo_data.grid.grid[:,0].astype(bool) # In[12]: a2 = a.reshape(50,50,50) # In[16]: a2[:,:,0] # In[17]: geo_data.grid.grid # In[20]: 50*50 # In[18]: geo_data.data_to_pickle('sandstone') # In[21]: a2 = a1[:2500] # In[30]: g = geo_data.grid.grid h = geo_data.grid.grid[:2500] # In[33]: get_ipython().run_cell_magic('timeit', '', 'eu(g,h)\n') # In[11]: def squared_euclidean_distances(x_1, x_2): """ Compute the euclidian distances in 3D between all the points in x_1 and x_2 Args: x_1 (theano.tensor.matrix): shape n_points x number dimension x_2 (theano.tensor.matrix): shape n_points x number dimension Returns: theano.tensor.matrix: Distancse matrix. shape n_points x n_points """ # T.maximum avoid negative numbers increasing stability return sqd # In[24]: x_1 = T.matrix() x_2 = T.matrix() sqd = T.sqrt(T.maximum( (x_1**2).sum(1).reshape((x_1.shape[0], 1)) + (x_2**2).sum(1).reshape((1, x_2.shape[0])) - 2 * x_1.dot(x_2.T), 0 )) eu = theano.function([x_1, x_2], sqd) # In[22]: # In[16]: from evtk.hl import gridToVTK import numpy as np # Dimensions nx, ny, nz = 50, 50, 50 lx = geo_data.extent[0]-geo_data.extent[1] ly = geo_data.extent[2]-geo_data.extent[3] lz = geo_data.extent[4]-geo_data.extent[5] dx, dy, dz = lx/nx, ly/ny, lz/nz ncells = nx * ny * nz npoints = (nx + 1) * (ny + 1) * (nz + 1) # Coordinates x = np.arange(0, lx + 0.1*dx, dx, dtype='float64') y = np.arange(0, ly + 0.1*dy, dy, dtype='float64') z = np.arange(0, lz + 0.1*dz, dz, dtype='float64') x += geo_data.extent[0] y +=geo_data.extent[2] z +=geo_data.extent[5] # Variables litho = sol[-1,0,:].reshape( (nx, ny, nz)) gridToVTK("./sandstone", x, y, z, cellData = {"lithology" : litho},) # In[13]: geo_data.extent[4] # In[14]: # Plot the block model. GeMpy.plot_section(geo_data, 13, block = sol[-1,0,:], direction='x', plot_data = True) # In[ ]: # In[6]: geo_res = pn.read_csv('olaqases.vox') # In[7]: geo_res = geo_res.iloc[9:] # In[8]: geo_res['nx 50'].unique(), geo_data.formations # In[46]: ip_addresses = geo_data.interfaces["formation"].unique() ip_dict = dict(zip(ip_addresses, range(1, len(ip_addresses) + 1))) ip_dict['Murchison'] = 0 ip_dict['out'] = 0 ip_dict['SimpleMafic'] = 4 geo_res_num = geo_res['nx 50'].replace(ip_dict) # In[47]: geo_res_num # In[48]: ip_dict # In[49]: (geo_res_num.shape[0]), sol[-1,0,:].shape[0] # In[50]: sol[-1,0, :][7] # In[52]: geo_res_num: # In[53]: geo_res_num.as_matrix().astype(int) # In[76]: plt.imshow( geo_res_num.as_matrix().reshape(50, 50, 50)[:, 23, :], origin="bottom", cmap="viridis" ) # In[77]: plt.imshow( sol[-1,0,:].reshape(50, 50, 50)[:, 23, :].T, origin="bottom", cmap="viridis" ) # In[59]: # Plot the block model. GeMpy.plot_section(geo_data, 13, block = geo_res_num.as_matrix(), direction='y', plot_data = True) # In[57]: 50*50*50 # In[78]: np.unique(sol[-1,0,:]) # In[79]: # Formation number and formation data_interp.interfaces.groupby('formation number').formation.unique() # In[ ]: data_interp.interpolator.tg.u_grade_T.get_value() # In[ ]: np.unique(sol) # In[ ]: #np.save('SandstoneSol', sol) np.count_nonzero(np.load('SandstoneSol.npy') == sol) # In[ ]: sol.shape # In[12]: GeMpy.PlotData(geo_data).plot3D_steno(sol[-1,0,:], 'Sandstone', description='The sandstone model') # In[18]: np.linspace(geo_data.extent[0], geo_data.extent[1], geo_data.resolution[0], retstep=True) # In[8]: np.diff(np.linspace(geo_data.extent[0], geo_data.extent[1], geo_data.resolution[0], retstep=False)).shape # In[21]: (geo_data.extent[1]- geo_data.extent[0])/ geo_data.resolution[0]-4 # In[25]: (geo_data.extent[1]- geo_data.extent[0])/39 # In[12]: # So far this is a simple 3D visualization. I have to adapt it into GeMpy lith0 = sol == 0 lith1 = sol == 1 lith2 = sol == 2 lith3 = sol == 3 lith4 = sol == 4 np.unique(sol) import ipyvolume.pylab as p3 p3.figure(width=800) p3.scatter(geo_data.grid.grid[:,0][lith0], geo_data.grid.grid[:,1][lith0], geo_data.grid.grid[:,2][lith0], marker='box', color = 'blue', size = 0.1 ) p3.scatter(geo_data.grid.grid[:,0][lith1], geo_data.grid.grid[:,1][lith1], geo_data.grid.grid[:,2][lith1], marker='box', color = 'yellow', size = 1 ) p3.scatter(geo_data.grid.grid[:,0][lith2], geo_data.grid.grid[:,1][lith2], geo_data.grid.grid[:,2][lith2], marker='box', color = 'green', size = 1 ) p3.scatter(geo_data.grid.grid[:,0][lith3], geo_data.grid.grid[:,1][lith3], geo_data.grid.grid[:,2][lith3], marker='box', color = 'pink', size = 1 ) p3.scatter(geo_data.grid.grid[:,0][lith4], geo_data.grid.grid[:,1][lith4], geo_data.grid.grid[:,2][lith4], marker='box', color = 'red', size = 1 ) p3.xlim(np.min(geo_data.grid.grid[:,0]),np.min(geo_data.grid.grid[:,0])+2175.0*40) p3.ylim(np.min(geo_data.grid.grid[:,1]),np.max(geo_data.grid.grid[:,1])) p3.zlim(np.min(geo_data.grid.grid[:,2]),np.min(geo_data.grid.grid[:,2])+2175.0*40)#np.max(geo_data.grid.grid[:,2])) p3.show() # In[11]: # The profile at the moment sucks because all what is whithin a scan is not subdivided debugging.profile.summary() # #### Below here so far is deprecated # First we make a GeMpy instance with most of the parameters default (except range that is given by the project). Then we also fix the extension and the resolution of the domain we want to interpolate. Finally we compile the function, only needed once every time we open the project (the guys of theano they are working on letting loading compiled files, even though in our case it is not a big deal). # # *General note. So far the reescaling factor is calculated for all series at the same time. GeoModeller does it individually for every potential field. I have to look better what this parameter exactly means* # All input data is stored in pandas dataframes under, ```self.Data.Interances``` and ```self.Data.Foliations```: # In case of disconformities, we can define which formation belong to which series using a dictionary. Until python 3.6 is important to specify the order of the series otherwise is random # Now in the data frame we should have the series column too # Next step is the creating of a grid. So far only regular. By default it takes the extent and the resolution given in the `import_data` method. # In[6]: # Create a class Grid so far just regular grid #GeMpy.set_grid(geo_data) #GeMpy.get_grid(geo_data) # ## Plotting raw data # The object Plot is created automatically as we call the methods above. This object contains some methods to plot the data and the results. # # It is possible to plot a 2D projection of the data in a specific direction using the following method. Also is possible to choose the series you want to plot. Additionally all the key arguments of seaborn lmplot can be used. # In[7]: #GeMpy.plot_data(geo_data, 'y', geo_data.series.columns.values[1]) # ## Class Interpolator # This class will take the data from the class Data and calculate potential fields and block. We can pass as key arguments all the variables of the interpolation. I recommend not to touch them if you do not know what are you doing. The default values should be good enough. Also the first time we execute the method, we will compile the theano function so it can take a bit of time. # In[10]: get_ipython().run_line_magic('debug', '') # In[9]: geo_data.interpolator.results # In[10]: geo_data.interpolator.tg.c_o_T.get_value(), geo_data.interpolator.tg.a_T.get_value() # In[9]: geo_data.interpolator.compile_potential_field_function() # In[31]: geo_data.interpolator.compute_potential_fields('BIF_Series',verbose = 3) # In[18]: geo_data.interpolator.potential_fields # In[11]: geo_data.interpolator.results # In[41]: geo_data.interpolator.tg.c_resc.get_value() # Now we could visualize the individual potential fields as follow: # ### Early granite # In[ ]: GeMpy.plot_potential_field(geo_data,10, n_pf=0) # ### BIF Series # In[12]: GeMpy.plot_potential_field(geo_data,13, n_pf=1, cmap = "magma", plot_data = True, verbose = 5) # ### SImple mafic # In[13]: GeMpy.plot_potential_field(geo_data, 10, n_pf=2) # ## Optimizing the export of lithologies # # But usually the final result we want to get is the final block. The method `compute_block_model` will compute the block model, updating the attribute `block`. This attribute is a theano shared function that can return a 3D array (raveled) using the method `get_value()`. # In[7]: GeMpy.compute_block_model(geo_data) # In[ ]: #GeMpy.set_interpolator(geo_data, u_grade = 0, compute_potential_field=True) # And again after computing the model in the Plot object we can use the method `plot_block_section` to see a 2D section of the model # In[8]: GeMpy.plot_section(geo_data, 13, direction='y') # ## Export to vtk. (*Under development*) # In[14]: """Export model to VTK Export the geology blocks to VTK for visualisation of the entire 3-D model in an external VTK viewer, e.g. Paraview. ..Note:: Requires pyevtk, available for free on: https://github.com/firedrakeproject/firedrake/tree/master/python/evtk **Optional keywords**: - *vtk_filename* = string : filename of VTK file (default: output_name) - *data* = np.array : data array to export to VKT (default: entire block model) """ vtk_filename = "noddyFunct2" extent_x = 10 extent_y = 10 extent_z = 10 delx = 0.2 dely = 0.2 delz = 0.2 from pyevtk.hl import gridToVTK # Coordinates x = np.arange(0, extent_x + 0.1*delx, delx, dtype='float64') y = np.arange(0, extent_y + 0.1*dely, dely, dtype='float64') z = np.arange(0, extent_z + 0.1*delz, delz, dtype='float64') # self.block = np.swapaxes(self.block, 0, 2) gridToVTK(vtk_filename, x, y, z, cellData = {"geology" : sol}) # ## Performance Analysis # One of the advantages of theano is the posibility to create a full profile of the function. This has to be included in at the time of the creation of the function. At the moment it should be active (the downside is larger compilation time and I think also a bit in the computation so be careful if you need a fast call) # ### CPU # The following profile is with a 2 core laptop. Nothing spectacular. # Looking at the profile we can see that most of time is in pow operation (exponential). This probably is that the extent is huge and we are doing it with too much precision. I am working on it # ### GPU # In[16]: get_ipython().run_cell_magic('timeit', '', '\n# Compute the block\nGeMpy.compute_block_model(geo_data, [0,1,2], verbose = 0)\n') # In[17]: geo_data.interpolator._interpolate.profile.summary() # In[ ]: