#!/usr/bin/env python # coding: utf-8 # # datatable_demo # # This notebook demonstrates the use of the DataTable object in the ukds package. # # This demonstration uses for an example the following dataset: Gershuny, J., Sullivan, O. (2017). United Kingdom Time Use Survey, 2014-2015. Centre for Time Use Research, University of Oxford. [data collection]. UK Data Service. SN: 8128, http://doi.org/10.5255/UKDA-SN-8128-1 # # ## Import the ukds package # # This demonstration used the `ukds` package, which is available on PyPi. # In[1]: import ukds # ## Set up a filepath to a .tab data table file # # The filepath to the data table under study is specified here. This can be changed as needed. # In[2]: fp_tab=r'C:\Users\cvskf\OneDrive - Loughborough University\_Data\United_Kingdom_Time_Use_Survey_2014-2015'+\ r'\UKDA-8128-tab\tab\uktus15_household.tab' # ## Set up a filepath to a UKDS .rtf data dictionary file # # The filepath to the associated data dictionary is specified here. This can be changed as needed. # In[3]: fp_dd=r'C:\Users\cvskf\OneDrive - Loughborough University\_Data\United_Kingdom_Time_Use_Survey_2014-2015' + \ r'\UKDA-8128-tab\mrdoc\allissue\uktus15_household_ukda_data_dictionary.rtf' # ## Create a DataTable object # # A DataTable object is created. The filepaths are supplied as arguments and the files are read into the DataTable object. # In[4]: dt=ukds.DataTable(fp_tab,fp_dd) print(dt.__doc__) print(dt) # The data table .tab file is stored in the `tab` attribute as a pandas DataFrame: # In[5]: dt.tab.head() # The data dictionary .rtf file is stored in the `datadictionary` attribute as a ukds.DataDictionary object: # In[6]: dt.datadictionary # ## Get dataframe # # The information in the `tab` and `datadictionary` attributes can be combined by the `get_dataframe` method. # # This method returns a new pandas Dataframe in which: # - the columns are a multi-level index which hold the data dictionary information # - the table values are converted from numerical values to the label values, where applicable # # In[7]: df=dt.get_dataframe() df.head() # In[ ]: