#!/usr/bin/env python # coding: utf-8 # Contents # === # - Introduction # - Removing Features # # Introduction # This IPython notebook illustrates how to remove features from feature table. # First, we need to import py_entitymatching package and other libraries as follows: # In[1]: # Import py_entitymatching package import py_entitymatching as em import os import pandas as pd # Then, read the (sample) input tables for blocking purposes # In[3]: # Get the datasets directory datasets_dir = em.get_install_path() + os.sep + 'datasets' # Get the paths of the input tables path_A = datasets_dir + os.sep + 'person_table_A.csv' path_B = datasets_dir + os.sep + 'person_table_B.csv' # In[4]: # Read the CSV files and set 'ID' as the key attribute A = em.read_csv_metadata(path_A, key='ID') B = em.read_csv_metadata(path_B, key='ID') # In[5]: # Get features feature_table = em.get_features_for_blocking(A, B) # # Removing Features from Feature Table # In[6]: type(feature_table) # In[9]: feature_table.head() # In[11]: # Drop first row feature_table = feature_table.drop(0) # In[12]: feature_table.head() # In[15]: #Remove all the features except involving name (Include only the features where the left attribute is name) feature_table = feature_table[feature_table.left_attribute=='name'] # In[14]: feature_table # In[16]: #Remove all the features except involving jaccard (Include only the features where the sim function is jaccard) feature_table = feature_table[feature_table.simfunction=='jaccard'] # In[17]: feature_table