#!/usr/bin/env python
# coding: utf-8

# # Introduction

# In[1]:


# Import py_entitymatching package
import py_entitymatching as em
import os
import pandas as pd


# Then, read the (sample) input tables

# In[2]:


# Get the datasets directory
datasets_dir = em.get_install_path() + os.sep + 'datasets'

# Get the paths of the input tables
path = datasets_dir + os.sep + 'dblp_demo.csv'


# In[3]:


# Read the CSV file and set 'ID' as the key attribute
A = em.read_csv_metadata(path, key='id')
B = em.read_csv_metadata(path, key='id')
A.head()


# # Data Exploration
# 
# This notebook will demonstrate using two different data exploration tools. OpenRefine is supported for python 2.7 and 3.5 and PandasTable is only supported for python 3.5 

# ## OpenRefine

# In[4]:


# Invoke the open refine gui for data exploration
p = em.data_explore_openrefine(A, name='Table')


# In[5]:


# Save the project back to our dataframe
# after calling export_pandas_frame, the openRefine project will be deleted automatically
A = p.export_pandas_frame()


# In[6]:


A.head()


# ## Pandastable

# In[7]:


# Invoke the pandastable gui for data exploration
# The process will be blocked until closing the GUI
em.data_explore_pandastable(B)


# In[8]:


B.head()


# In[ ]: