#!/usr/bin/env python # coding: utf-8 # # Introduction # This IPython notebook illustrates how to remove features from feature table. # First, we need to import py_entitymatching package and other libraries as follows: # In[1]: # Import py_entitymatching package import py_entitymatching as em import os import pandas as pd # Then, read the (sample) input tables for blocking purposes # In[2]: # Get the datasets directory datasets_dir = em.get_install_path() + os.sep + 'datasets' # Get the paths of the input tables path_A = datasets_dir + os.sep + 'person_table_A.csv' path_B = datasets_dir + os.sep + 'person_table_B.csv' # In[3]: # Read the CSV files and set 'ID' as the key attribute A = em.read_csv_metadata(path_A, key='ID') B = em.read_csv_metadata(path_B, key='ID') # In[4]: # Get features (for blocking) feature_table = em.get_features_for_blocking(A, B, validate_inferred_attr_types=False) # Get features (for matching) # feature_table = em.get_features_for_matching(A, B) # # Removing Features from Feature Table # In[5]: type(feature_table) # In[6]: feature_table.head() # In[7]: # Drop first row feature_table = feature_table.drop(0) # In[8]: feature_table.head() # In[9]: #Remove all the features except involving name (Include only the features where the left attribute is name) feature_table = feature_table[feature_table.left_attribute=='name'] # In[10]: feature_table # In[11]: #Remove all the features except involving jaccard (Include only the features where the sim function is jaccard) feature_table = feature_table[feature_table.simfunction=='jaccard'] # In[12]: feature_table