#!/usr/bin/env python # coding: utf-8 # # Introduction # In[1]: # Import py_entitymatching package import py_entitymatching as em import os import pandas as pd import pandas_profiling # Then, read the (sample) input tables # In[2]: # Get the datasets directory datasets_dir = em.get_install_path() + os.sep + 'datasets' # Get the paths of the input tables path_A = datasets_dir + os.sep + 'dblp_demo.csv' # In[4]: # Read the CSV files and set 'ID' as the key attribute A = em.read_csv_metadata(path_A, key='id') A.head() # # Data Profiling # In[5]: pandas_profiling.ProfileReport(A) # ## Saving the Data Profiling Report to an HTML File # In[6]: pfr = pandas_profiling.ProfileReport(A) pfr.to_file("/tmp/example.html") # In[7]: pfr # In[ ]: