#!/usr/bin/env python # coding: utf-8 # This example makes use of the Google/OpenRefine client that can be found at https://github.com/PaulMakepeace/refine-client-py # # The example is inspired by, and draws heavily from, Trevor Muñoz's tutorial posted at http://nbviewer.ipython.org/gist/trevormunoz/6265360 # # --- # First we need to import some necessary libraries... # In[4]: get_ipython().system('~/anaconda/bin/pip install git+https://github.com/PaulMakepeace/refine-client-py.git') # In[10]: get_ipython().system('~/anaconda/bin/pip install refine-client') # In[5]: from google.refine import refine, facet # Make sure you have OpenRefine running locally before going any further! (You can download and install it from [OpenRefine.org](http://openrefine.org).) # # Once you've started OpenRefine in the normal way, get a link to the server... # In[8]: server = refine.RefineServer() orefine = refine.Refine(server) # Let's check everything's working by previewing three examples from the list of any projects that have previously been created... # In[9]: orefine.list_projects().items()[:3] # If we want to work with one of these projects, we can open it directly: # In[5]: project_file = 'smalltest.csv' # In[17]: p=orefine.new_project(project_file=project_file) # In[25]: pr=p.get_rows(limit=10) # In[14]: dir(pr.rows[0]) # In[29]: pr.rows[0].row # In[32]: f=facet.TextFacet('CompanyName') # In[33]: dir(f) # In[42]: fr=p.compute_facets(facet.TextFacet('CompanyName')) # In[43]: facets = fr.facets[0] for k in sorted(facets.choices, key=lambda k: facets.choices[k].count, reverse=True): print facets.choices[k].count, k # In[50]: dir(pr.rows.rows_response) pr.rows.rows_response # In[53]: dir(pr) # In[54]: dir(p) # In[55]: p.columns # In[ ]: