#!/usr/bin/env python
# coding: utf-8

# # Introduction

# This IPython notebook illustrates how to remove features from feature table.
# First, we need to import py_entitymatching package and other libraries as follows:

# In[1]:


# Import py_entitymatching package
import py_entitymatching as em
import os
import pandas as pd


# Then, read the (sample) input tables for blocking purposes

# In[2]:


# Get the datasets directory
datasets_dir = em.get_install_path() + os.sep + 'datasets'

# Get the paths of the input tables
path_A = datasets_dir + os.sep + 'person_table_A.csv'
path_B = datasets_dir + os.sep + 'person_table_B.csv'


# In[3]:


# Read the CSV files and set 'ID' as the key attribute
A = em.read_csv_metadata(path_A, key='ID')
B = em.read_csv_metadata(path_B, key='ID')


# In[4]:


# Get features (for blocking)
feature_table = em.get_features_for_blocking(A, B, validate_inferred_attr_types=False)
# Get features (for matching)
# feature_table = em.get_features_for_matching(A, B)


# # Removing Features from Feature Table

# In[5]:


type(feature_table)


# In[6]:


feature_table.head()


# In[7]:


# Drop first row
feature_table = feature_table.drop(0)


# In[8]:


feature_table.head()


# In[9]:


#Remove all the features except involving name (Include only the features where the left attribute is name)
feature_table = feature_table[feature_table.left_attribute=='name']


# In[10]:


feature_table


# In[11]:


#Remove all the features except involving jaccard (Include only the features where the sim function is jaccard)
feature_table = feature_table[feature_table.simfunction=='jaccard']


# In[12]:


feature_table