#!/usr/bin/env python
# coding: utf-8

# # Introduction

# In[1]:


# Import py_entitymatching package
import py_entitymatching as em
import os
import pandas as pd
import pandas_profiling


# Then, read the (sample) input tables

# In[2]:


# Get the datasets directory
datasets_dir = em.get_install_path() + os.sep + 'datasets'

# Get the paths of the input tables
path_A = datasets_dir + os.sep + 'dblp_demo.csv'


# In[4]:


# Read the CSV files and set 'ID' as the key attribute
A = em.read_csv_metadata(path_A, key='id')
A.head()


# # Data Profiling

# In[5]:


pandas_profiling.ProfileReport(A)


# ## Saving the Data Profiling Report to an HTML File

# In[6]:


pfr = pandas_profiling.ProfileReport(A)
pfr.to_file("/tmp/example.html")


# In[7]:


pfr


# In[ ]: