#!pip install pandas pyfim==6.28 import pandas as pd from fim import arules # display docs ??arules # for more details visit here: https://borgelt.net/pyfim.html # inputs supp = 2 # minimum support of an assoc. rule (default: 10) conf = 50 # minimum confidence of an assoc. rule (default: 80%) report = 'asC' dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'], ['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'], ['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'], ] # make dict for nicer looking column names report_colnames = { 'a': 'support_itemset_absolute', 's': 'support_itemset_relative', 'S': 'support_itemset_relative_pct', 'b': 'support_bodyset_absolute', 'x': 'support_bodyset_relative', 'X': 'support_bodyset_relative_pct', 'h': 'support_headitem_absolute', 'y': 'support_headitem_relative', 'Y': 'support_headitem_relative_pct', 'c': 'confidence', 'C': 'confidence_pct', 'l': 'lift', 'L': 'lift_pct', 'e': 'evaluation', 'E': 'evaluation_pct', 'Q': 'xx', 'S': 'support_emptyset', } # run apriori result = arules(dataset, supp=supp, conf=conf, report=report) # make df of results colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)] df_rules = pd.DataFrame(result, columns=colnames) df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False) print(df_rules.shape) # look at some higher support rules df_rules.head(10) # look at some lower support rules df_rules.tail(10)