#!/usr/bin/env python # coding: utf-8 # # 2017-08-18 Building the Data Download Function # ## Function (Including Packages) # # # In[1]: # define a function to download the data if not already downloaded, and then read it in import pandas_datareader from pandas import DataFrame, Series import pandas as pd import os from urllib.request import urlretrieve def getdata_read_or_download(filename, source_URL, force_download = False): '''Use pandas to read in data from a specified local file in the current working directory, or download data from a specified source URL if the local file does not exist in the current working directory. Download can be forced if the local file is corrupt or simply needs to be updated. Parameters: =========== filename : string location of already-dowloaded data in current working directory source_URL : string location of data on internet force_download: boolean (optional) if True, force redownload of data Returns: ======== datafame : pandas dataframe the data file for the analysis ''' if ((force_download == True) or not os.path.exists(filename)): urlretrieve(source_URL,filename) dataframe = pd.read_csv(filename) return dataframe # In[12]: get_ipython().system('cd getdata_read_or_download') get_ipython().system(' pwd') # ## Tests: # In[2]: # Test: read in the 2014 data from the web ccuds_pandp_data = getdata_read_or_download(filename = "pandp.csv", source_URL = "http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv") # ccuds_pandp_data = pd.read_csv( # 'http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv') # In[3]: # Test: check to see if the data is in a comprehensible format... ccuds_pandp_data.info() # In[4]: # Test: check to see if the data looks like the right data... ccuds_pandp_data